data(metals)
set.seed(1231124)
spl = split_data(metals)
Xnm <- c(
'arsenic','barium','cadmium','calcium','chromium','copper',
'iron','lead','magnesium','manganese','mercury','selenium','silver',
'sodium','zinc'
)
dim(spl$traindata) # 181 observations = 40% of total
dim(spl$validdata) # 271 observations = 60% of total
splitres <- qgcomp.partials(fun="qgcomp.glm.noboot", f=y~., q=4,
traindata=spl$traindata,validdata=spl$validdata, expnms=Xnm)
splitres
# also used to compare linear vs. non-linear fits (useful if you have enough data)
set.seed(1231)
spl = split_data(metals, prop.train=.5)
lin = qgcomp.glm.boot(f=y~., q=4, expnms=Xnm, B=5, data=spl$traindata)
nlin1 = qgcomp.glm.boot(f=y~. + I(manganese^2) + I(calcium^2), expnms=Xnm, deg=2,
q=4, B=5, data=spl$traindata)
nlin2 = qgcomp.glm.boot(f=y~. + I(arsenic^2) + I(cadmium^2), expnms=Xnm, deg=2,
q=4, B=5, data=spl$traindata)
AIC(lin);AIC(nlin1);AIC(nlin2)
# linear has lowest training AIC, so base final fit off that (and bootstrap not needed)
qgcomp.glm.noboot(f=y~., q=4, expnms=Xnm, data=spl$validdata)
Run the code above in your browser using DataLab