library(fscaret)
# Create data sets and labels data frame
trainMatrix <- matrix(rnorm(150*120,mean=10,sd=1), 150, 120)
# Adding some near-zero variance attributes
temp1 <- matrix(runif(150,0.0001,0.0005), 150, 12)
# Adding some highly correlated attributes
sampleColIndex <- sample(ncol(trainMatrix), size=10)
temp2 <- matrix(trainMatrix[,sampleColIndex]*2, 150, 10)
# Output variable
output <- matrix(rnorm(150,mean=10,sd=1), 150, 1)
trainMatrix <- cbind(trainMatrix,temp1,temp2, output)
colnames(trainMatrix) <- paste("X",c(1:ncol(trainMatrix)),sep="")
# Subset test data set
testMatrix <- trainMatrix[sample(round(0.1*nrow(trainMatrix))),]
labelsDF <- data.frame("Labels"=paste("X",c(1:ncol(trainMatrix)),sep=""))
lk_col <- ncol(trainMatrix)
lk_row <- nrow(trainMatrix)
with.labels = TRUE
testRes <- dataPreprocess(trainMatrix, testMatrix,
labelsDF, lk_col, lk_row, with.labels)
summary(testRes)
# Selected attributes after data set preprocessing
testRes$labelsDF
# Training and testing data sets after preprocessing
testRes$trainMatryca
testRes$testMatryca
Run the code above in your browser using DataCamp Workspace