rauc: rauc

Description

minimizes 1 - (p)AUC plus a penalty

Usage

rauc (formula, dat, s = 1,lambda=1, kernel="linear", para=NULL, start.method="rlogit", 
eta0.init=NULL,beta.init = NULL, eta.diff.init=NULL, 
maxit=50, tol=1e-5,minQuad.control = control.minQuad(),
init.alpha.from.previous = TRUE,mem.efficient = TRUE,
ret.vcov = FALSE, garbage.collection = TRUE, verbose = FALSE, ...
)

Arguments

formula

formula, e.g. y~x1+x2

dat

Data frame

absolute value of the slope, default to 1 - REMOVE THIS, the pair (s,lambda) is redundant

lambda

scale parameter in front of the penalty function, default to 1

kernel

See getK for more details

para

See getK for more details

start.method

a string. When kernel is linear: If "rlogit", robust logistic fit is used as beta.init. If "1", a vector of 1 is used as beta.init. If "0", a vector of 0 is used as beta.init.

eta0.init

a vector of the same length as the number of rows in dat

beta.init

a vector of length equal to no. of covariates (without intercept) of initial values for linear kernel.

eta.diff.init

a vector of the same length as the number of rows in dat

maxit

maximum number of iterations in the DCA algorithm

tol

absolute tolerance in RAUC if kernel is not linear, relative tolerance in coefficients if kernel is linear.

minQuad.control

control parameters passed to method minQuad, please see minQuad.

init.alpha.from.previous

defaults to TRUE, if TRUE then after the first iteration minQuad receives as the initial "alpha" the estimate of "alpha" from the previous iteration in dca algorithm.

mem.efficient

if TRUE, the small matrix 'K' instead of 'Q' is used in computations, defaults to TRUE.

ret.vcov

logical, whether to return an estimate of the covariance matrix of 'beta' for normal or logistic sigmoid functions.

garbage.collection

logical, whether to call gc at end of each DCA iteration

verbose

prints information at each iteration, defaults to FALSE

...

for debugging purposes only

Value

A list with the following elements:
convergence0 if converged, 1 if maximum iteration is reached.
valuevalue of the objective function.
iterationsnumber of iterations until convergence or 'maxit' reached.

Examples

Run this code

# options(path.svml = 'D:/downloaded_scientific_programs/svmlight') 
# options(path.svml ='~/bin/svmlight')

###########################################################
# a linear example

dat = sim.dat.1(n=200,seed=1)

# convergence takes long, to pass CRAN check, set maxit=1 

fit1 = rauc (y~x1+x2, dat, lambda=2, kernel="linear", maxit=2)
#fit2 = rauc.linear (y~x1+x2, dat, lambda=2, verbose=TRUE)
#aux2=fit2$X %*% fit2$coefficients
#all(fit1$linear.combination-aux2<1e-2)
fit1$train.auc # 0.7206015


fit3 = rauc (y~x1+x2, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
fit3$train.auc # 0.7773434



fit4 = svml (y~x1+x2, dat, kernel="r", fitted=FALSE, cost=1e4) 
fast.auc(predict(fit4, dat)$posterior[,1], dat$y) # 0.7921805
tune.svml(y~x1+x2, dat, kernel="r")
#        1        10       100      1000     10000     1e+05
#0.7027569 0.7254135 0.7517794 0.7653133 0.7921805 0.6674687

# glm derived score for comparision
fit.glm=glm(y~x1+x2, dat, family="binomial")
fast.auc(fit1$X %*% fit.glm$coef[-1], fit1$y) # 

# add outliers
dat = sim.dat.1(n=200,seed=1, add.outliers=TRUE)

fit3 = rauc (y~x1+x2, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
fit3$train.auc # 0.7066667

fit4 = svml (y~x1+x2, dat, kernel="r", fitted=FALSE, cost=1e4) 
fast.auc(predict(fit4, dat)$posterior[,1], dat$y) # 0.6910101
tune.svml(y~x1+x2, dat, kernel="r")
#        1        10       100      1000     10000     1e+05 
#0.6485859 0.6705051 0.6722222 0.6767677 0.6910101 0.5007071



###########################################################
# a nonlinear example

dat=skin.orange (n=100,seed=1,noise=FALSE)
dim(dat)

# nonlinear kernel fit
fit1 = rauc (y~x1+x2+x3+x4, dat, lambda=2, kernel="rbf", para=1, verbose=TRUE)
# glm fit
fit.glm=glm(y~x1+x2+x3+x4, dat, family="binomial")
# linear kernel fit
fit2 = rauc (y~x1+x2+x3+x4, dat, lambda=2, kernel="linear", start.method = "rlogit", verbose=TRUE)

# training data prediction
fast.auc(fit1$linear.combination, fit1$y)
fast.auc(fit1$X %*% fit.glm$coef[-1], fit1$y)
fast.auc(fit2$linear.combination, fit2$y)

# test data prediction
newdata=skin.orange (n=1000,seed=2,noise=FALSE)
fast.auc(predict(fit1, newdata), newdata$y)
fast.auc(as.matrix(subset(newdata, select=c(x1,x2,x3,x4))) %*% fit.glm$coef[-1], newdata$y)
fast.auc(predict(fit2, newdata), newdata$y)



###### IMPROVEMENTS ####################################################

 
## rank = 2 problem 
dat = sim.dat.1(n=300,seed=1,add.outliers = TRUE,std.dev = 1.0);fm = y~x1+x2

## linear kernel and random working set selection - low rank (2) problem
## setting initial alpha (to be passed to minQuad at each iteration in dca-loop) 
# to estimate from previous dca() iteration 
## size of working set is automatically set
set.seed(100) 
fit.lin = rauc (fm, dat,lambda=.1,kernel="linear",
verbose=TRUE,maxit = 100,tol = 1e-5,
init.alpha.from.previous = TRUE,mem.efficient = TRUE,
minQuad.control = control.minQuad(
                            verbose = 1,maxit = 1e6,tol = 1e-4,
                            method = "tron",                            
                            working.set= "rv2wg")
)

## 'rbf' kernel and random working set selection
## low rank mapped to possibly infinite rank problem try larger working set 'q' set.seed(100) 
## size of working set is set to q = 100
fit.rbf = rauc (fm, dat,lambda=.1,kernel="rbf",para = 1, verbose=TRUE,maxit = 100,tol = 1e-5,
init.alpha.from.previous = TRUE,mem.efficient = TRUE,
minQuad.control = control.minQuad(
                            verbose = 1,maxit = 1e6,tol = 1e-4,
                            q = 100,
                            method = "tron",                            
                            working.set= "rv2wg")
)

Run the code above in your browser using DataLab