# set up 'parallel' cluster
cls <- makeCluster(2)
setclsinfo(cls)
# generate simulated test data, as distributed data frame
n <- 25000
u <- matrix(nrow=n,ncol=4)
u[,1:3] <- rnorm(3*n)
u[,4] <- u[,1] + 2*u[,2] + u[,3]
distribsplit(cls,"u")
# apply the function
calm(cls,"u[,4] ~ u[,1]+u[,2]")$tht
# check; results should be approximately the same
lm(u[,4] ~ u[,1]+u[,2])
# Census data on programmers and engineers; include a quadratic term for
# age, due to nonmonotone relation to income
data(prgeng)
distribsplit(cls,"prgeng")
caout <- calm(cls,"wageinc ~ age+I(age^2)+sex+wkswrkd,data=prgeng")
caout$tht
# compare to nonparallel
lm(wageinc ~ age+I(age^2)+sex+wkswrkd,data=prgeng)
# get standard errors of the beta-hats
sqrt(diag(caout$thtcov))
# find mean age for all combinations of the cit and sex variables
caagg(cls,"age",c("cit","sex"),"prgeng","mean")
# compare to nonparallel
aggregate(age ~ cit+sex,data=prgeng,mean)
data(newadult)
distribsplit(cls,"newadult")
caglm(cls,"gt50 ~ ., family = binomial,data=newadult")$tht
caprcomp(cls,'newadult,scale=TRUE',5)$sdev
prcomp(newadult,scale=TRUE)$sdev
cameans(cls,"prgeng")
cameans(cls,"prgeng[,c('age','wageinc')]")
caquantile(cls,'prgeng$age')
pe <- prgeng[,c(1,3,8)]
distribsplit(cls,"pe")
z1 <- cakm(cls,'pe',3,3); z1$size; z1$centers
# check algorithm unstable
z1$thts # looks unstableRun the code above in your browser using DataLab