# NOT RUN {
data(iris)
attach(iris)
x=iris[,1:4]
y=factor(iris[,5])
train=sample(1:dim(iris)[1],100)
xTrain=x[train,]
xTest=x[-train,]
yTrain=y[train]
yTest=y[-train]
# Center and scale data
s=scale(xTrain,center=TRUE,scale=TRUE)
# Find the best model with the best cost parameter via 10-fold cross-validations
tryTypes=c(0:7)
tryCosts=c(1000,1,0.001)
bestCost=NA
bestAcc=0
bestType=NA
for(ty in tryTypes){
for(co in tryCosts){
acc=LiblineaR(data=s,target=yTrain,type=ty,cost=co,bias=1,cross=5,verbose=FALSE)
cat("Results for C=",co," : ",acc," accuracy.\n",sep="")
if(acc>bestAcc){
bestCost=co
bestAcc=acc
bestType=ty
}
}
}
cat("Best model type is:",bestType,"\n")
cat("Best cost is:",bestCost,"\n")
cat("Best accuracy is:",bestAcc,"\n")
# Re-train best model with best cost value.
m=LiblineaR(data=s,target=yTrain,type=bestType,cost=bestCost,bias=1,verbose=FALSE)
# Scale the test data
s2=scale(xTest,attr(s,"scaled:center"),attr(s,"scaled:scale"))
# Make prediction
pr=FALSE
if(bestType==0 || bestType==7) pr=TRUE
p=predict(m,s2,proba=pr,decisionValues=TRUE)
# Display confusion matrix
res=table(p$predictions,yTest)
print(res)
# Compute Balanced Classification Rate
BCR=mean(c(res[1,1]/sum(res[,1]),res[2,2]/sum(res[,2]),res[3,3]/sum(res[,3])))
print(BCR)
#' #############################################
# Example of the use of a sparse matrix:
if(require(SparseM)){
# Sparsifying the iris dataset:
iS=apply(iris[,1:4],2,function(a){a[a<quantile(a,probs=c(0.25))]=0;return(a)})
irisSparse<-as.matrix.csr(iS)
# Applying a similar methodology as above:
xTrain=irisSparse[train,]
xTest=irisSparse[-train,]
# Re-train best model with best cost value.
m=LiblineaR(data=xTrain,target=yTrain,type=bestType,cost=bestCost,bias=1,verbose=FALSE)
# Make prediction
p=predict(m,xTest,proba=pr,decisionValues=TRUE)
# Display confusion matrix
res=table(p$predictions,yTest)
print(res)
}
#############################################
# Try regression instead, to predict sepal length on the basis of sepal width and petal width:
xTrain=iris[c(1:25,51:75,101:125),2:3]
yTrain=iris[c(1:25,51:75,101:125),1]
xTest=iris[c(26:50,76:100,126:150),2:3]
yTest=iris[c(26:50,76:100,126:150),1]
# Center and scale data
s=scale(xTrain,center=TRUE,scale=TRUE)
# Estimate MSE in cross-vaidation on a train set
MSECross=LiblineaR(data = s, target = yTrain, type = 13, cross = 10, svr_eps=.01)
# Build the model
m=LiblineaR(data = s, target = yTrain, type = 13, cross=0, svr_eps=.01)
# Test it, after test data scaling:
s2=scale(xTest,attr(s,"scaled:center"),attr(s,"scaled:scale"))
pred=predict(m,s2)$predictions
MSETest=mean((yTest-pred)^2)
# Was MSE well estimated?
print(MSETest-MSECross)
# Distribution of errors
print(summary(yTest-pred))
# }
Run the code above in your browser using DataCamp Workspace