# options(path.svml = 'D:/downloaded_scientific_programs/svmlight')
# options(path.svml ='~/bin/svmlight')
###########################################################
# a linear example
dat = sim.dat.1(n=200,seed=1)
# convergence takes long, to pass CRAN check, set maxit=1
fit1 = rauc (y~x1+x2, dat, lambda=2, kernel="linear", maxit=2)
#fit2 = rauc.linear (y~x1+x2, dat, lambda=2, verbose=TRUE)
#aux2=fit2$X %*% fit2$coefficients
#all(fit1$linear.combination-aux2<1e-2)
fit1$train.auc # 0.7206015
fit3 = rauc (y~x1+x2, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
fit3$train.auc # 0.7773434
fit4 = svml (y~x1+x2, dat, kernel="r", fitted=FALSE, cost=1e4)
fast.auc(predict(fit4, dat)$posterior[,1], dat$y) # 0.7921805
tune.svml(y~x1+x2, dat, kernel="r")
# 1 10 100 1000 10000 1e+05
#0.7027569 0.7254135 0.7517794 0.7653133 0.7921805 0.6674687
# glm derived score for comparision
fit.glm=glm(y~x1+x2, dat, family="binomial")
fast.auc(fit1$X %*% fit.glm$coef[-1], fit1$y) #
# add outliers
dat = sim.dat.1(n=200,seed=1, add.outliers=TRUE)
fit3 = rauc (y~x1+x2, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
fit3$train.auc # 0.7066667
fit4 = svml (y~x1+x2, dat, kernel="r", fitted=FALSE, cost=1e4)
fast.auc(predict(fit4, dat)$posterior[,1], dat$y) # 0.6910101
tune.svml(y~x1+x2, dat, kernel="r")
# 1 10 100 1000 10000 1e+05
#0.6485859 0.6705051 0.6722222 0.6767677 0.6910101 0.5007071
###########################################################
# a nonlinear example
dat=skin.orange (n=100,seed=1,noise=FALSE)
dim(dat)
# nonlinear kernel fit
fit1 = rauc (y~x1+x2+x3+x4, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
# glm fit
fit.glm=glm(y~x1+x2+x3+x4, dat, family="binomial")
# linear kernel fit
fit2 = rauc (y~x1+x2+x3+x4, dat, lambda=2, kernel="linear", start.method = "rlogit", verbose=TRUE)
# training data prediction
fast.auc(fit1$linear.combination, fit1$y)
fast.auc(fit1$X %*% fit.glm$coef[-1], fit1$y)
fast.auc(fit2$linear.combination, fit2$y)
# test data prediction
newdata=skin.orange (n=1000,seed=2,noise=FALSE)
fast.auc(predict(fit1, newdata), newdata$y)
fast.auc(as.matrix(subset(newdata, select=c(x1,x2,x3,x4))) %*% fit.glm$coef[-1], newdata$y)
fast.auc(predict(fit2, newdata), newdata$y)
###### IMPROVEMENTS ####################################################
## rank = 2 problem
dat = sim.dat.1(n=300,seed=1,add.outliers = TRUE,std.dev = 1.0);fm = y~x1+x2
## linear kernel and random working set selection - low rank (2) problem
## setting initial alpha (to be passed to minQuad at each iteration in dca-loop)
# to estimate from previous dca() iteration
## size of working set is automatically set
set.seed(100)
fit.lin = rauc (fm, dat,lambda=.1,kernel="linear",
verbose=TRUE,maxit = 100,tol = 1e-5,
init.alpha.from.previous = TRUE,mem.efficient = TRUE,
minQuad.control = control.minQuad(
verbose = 1,maxit = 1e6,tol = 1e-4,
method = "tron",
working.set= "rv2wg")
)
## 'rbf' kernel and random working set selection
## low rank mapped to possibly infinite rank problem try larger working set 'q' set.seed(100)
## size of working set is set to q = 100
fit.rbf = rauc (fm, dat,lambda=.1,kernel="rbf",para = 1, verbose=TRUE,maxit = 100,tol = 1e-5,
init.alpha.from.previous = TRUE,mem.efficient = TRUE,
minQuad.control = control.minQuad(
verbose = 1,maxit = 1e6,tol = 1e-4,
q = 100,
method = "tron",
working.set= "rv2wg")
)
Run the code above in your browser using DataLab