# NOT RUN {
### dontrun is used when the execution of the example requires some computational effort.
### simple regression example
set.seed(123); x1=rnorm(200,100,20); x2=rnorm(200,100,20)
y=0.7*sin(x1/(25*pi))+0.3*sin(x2/(25*pi))
# mining with an ensemble of neural networks, each fixed with size=2 hidden nodes
# assumes a default holdout (random split) with 2/3 for training and 1/3 for testing:
M=mining(y~x1+x2,Runs=2,model="mlpe",search=2)
print(M)
print(mmetric(M,metric="MAE"))
### more regression examples:
# }
# NOT RUN {
# simple nonlinear regression task; x3 is a random variable and does not influence y:
data(sin1reg)
# 5 runs of an external holdout with 2/3 for training and 1/3 for testing, fixed seed 12345
# feature selection: sabs method
# model selection: 5 searches for size, internal 2-fold cross validation fixed seed 123
# with optimization for minimum MAE metric
M=mining(y~.,data=sin1reg,Runs=5,method=c("holdout",2/3,12345),model="mlpe",
search=list(search=mparheuristic("mlpe",n=5),method=c("kfold",2,123),metric="MAE"),
feature="sabs")
print(mmetric(M,metric="MAE"))
print(M$mpar)
print("median hidden nodes (size) and number of MLPs (nr):")
print(centralpar(M$mpar))
print("attributes used by the model in each run:")
print(M$attributes)
mgraph(M,graph="RSC",Grid=10,main="sin1 MLPE scatter plot")
mgraph(M,graph="REP",Grid=10,main="sin1 MLPE scatter plot",sort=FALSE)
mgraph(M,graph="REC",Grid=10,main="sin1 MLPE REC")
mgraph(M,graph="IMP",Grid=10,main="input importances",xval=0.1,leg=names(sin1reg))
# average influence of x1 on the model:
mgraph(M,graph="VEC",Grid=10,main="x1 VEC curve",xval=1,leg=names(sin1reg)[1])
# }
# NOT RUN {
### regression example with holdout rolling windows:
# }
# NOT RUN {
# simple nonlinear regression task; x3 is a random variable and does not influence y:
data(sin1reg)
# rolling with 20 test samples, training window size of 300 and increment of 50 in each run:
# note that Runs argument is automatically set to 14 in this example:
M=mining(y~.,data=sin1reg,method=c("holdoutrol",20,300,50),
model="mlpe",debug=TRUE)
# }
# NOT RUN {
### regression example with all rminer models:
# }
# NOT RUN {
# simple nonlinear regression task; x3 is a random variable and does not influence y:
data(sin1reg)
models=c("naive","ctree","rpart","kknn","mlp","mlpe","ksvm","randomForest","mr","mars",
"cubist","pcr","plsr","cppls","rvm")
for(model in models)
{
M=mining(y~.,data=sin1reg,method=c("holdout",2/3,12345),model=model)
cat("model:",model,"MAE:",round(mmetric(M,metric="MAE")$MAE,digits=3),"\n")
}
# }
# NOT RUN {
### classification example (task="prob")
# }
# NOT RUN {
data(iris)
# 10 runs of a 3-fold cross validation with fixed seed 123 for generating the 3-fold runs
M=mining(Species~.,iris,Runs=10,method=c("kfold",3,123),model="rpart")
print(mmetric(M,metric="CONF"))
print(mmetric(M,metric="AUC"))
print(meanint(mmetric(M,metric="AUC")))
mgraph(M,graph="ROC",TC=2,baseline=TRUE,Grid=10,leg="Versicolor",
main="versicolor ROC")
mgraph(M,graph="LIFT",TC=2,baseline=TRUE,Grid=10,leg="Versicolor",
main="Versicolor ROC")
M2=mining(Species~.,iris,Runs=10,method=c("kfold",3,123),model="ksvm")
L=vector("list",2)
L[[1]]=M;L[[2]]=M2
mgraph(L,graph="ROC",TC=2,baseline=TRUE,Grid=10,leg=c("DT","SVM"),main="ROC")
# }
# NOT RUN {
### other classification examples
# }
# NOT RUN {
### 1st example:
data(iris)
# 2 runs of an external 2-fold validation, random seed
# model selection: SVM model with rbfdot kernel, automatic search for sigma,
# internal 3-fold validation, random seed, minimum "AUC" is assumed
# feature selection: none, "s" is used only to store input importance values
M=mining(Species~.,data=iris,Runs=2,method=c("kfold",2,NA),model="ksvm",
search=list(search=mparheuristic("ksvm"),method=c("kfold",3)),feature="s")
print(mmetric(M,metric="AUC",TC=2))
mgraph(M,graph="ROC",TC=2,baseline=TRUE,Grid=10,leg="SVM",main="ROC",intbar=FALSE)
mgraph(M,graph="IMP",TC=2,Grid=10,main="input importances",xval=0.1,
leg=names(iris),axis=1)
mgraph(M,graph="VEC",TC=2,Grid=10,main="Petal.Width VEC curve",
data=iris,xval=4)
### 2nd example, ordered kfold, k-nearest neigbor:
M=mining(Species~.,iris,Runs=1,method=c("kfoldo",3),model="knn")
# confusion matrix:
print(mmetric(M,metric="CONF"))
### 3rd example, use of all rminer models:
models=c("naive","ctree","rpart","kknn","mlp","mlpe","ksvm","randomForest","bagging",
"boosting","lda","multinom","naiveBayes","qda")
models="naiveBayes"
for(model in models)
{
M=mining(Species~.,iris,Runs=1,method=c("kfold",3,123),model=model)
cat("model:",model,"ACC:",round(mmetric(M,metric="ACC")$ACC,digits=1),"\n")
}
# }
# NOT RUN {
### multiple models: automl or ensembles
# }
# NOT RUN {
data(iris)
d=iris
names(d)[ncol(d)]="y" # change output name
inputs=ncol(d)-1
metric="AUC"
# simple automl (1 search per individual model),
# internal holdout and external holdout:
sm=mparheuristic(model="automl",n=NA,task="prob",inputs=inputs)
mode="auto"
imethod=c("holdout",4/5,123) # internal validation method
emethod=c("holdout",2/3,567) # external validation method
search=list(search=sm,smethod=mode,method=imethod,metric=metric,convex=0)
M=mining(y~.,data=d,model="auto",search=search,method=emethod,fdebug=TRUE)
# 1 single model was selected:
cat("best",emethod[1],"selected model:",M$object@model,"\n")
cat(metric,"=",round(as.numeric(mmetric(M,metric=metric)),2),"\n")
# simple automl (1 search per individual model),
# internal kfold and external kfold:
imethod=c("kfold",3,123) # internal validation method
emethod=c("kfold",5,567) # external validation method
search=list(search=sm,smethod=mode,method=imethod,metric=metric,convex=0)
M=mining(y~.,data=d,model="auto",search=search,method=emethod,fdebug=TRUE)
# kfold models were selected:
kfolds=as.numeric(emethod[2])
models=vector(length=kfolds)
for(i in 1:kfolds) models[i]=M$object$model[[i]]
cat("best",emethod[1],"selected models:",models,"\n")
cat(metric,"=",round(as.numeric(mmetric(M,metric=metric)),2),"\n")
# example with weighted ensemble:
M=mining(y~.,data=d,model="WE",search=search,method=emethod,fdebug=TRUE)
for(i in 1:kfolds) models[i]=M$object$model[[i]]
cat("best",emethod[1],"selected models:",models,"\n")
cat(metric,"=",round(as.numeric(mmetric(M,metric=metric)),2),"\n")
# }
# NOT RUN {
### for more fitting examples check the help of function fit: help(fit,package="rminer")
# }
Run the code above in your browser using DataLab