# NOT RUN {
# }
# NOT RUN {
# The following comparison among glm(), bigglm() and speedglm() cannot be considered rigorous
# and exhaustive, but it is only to give an idea of the computation time.
# It may take a long time.
require(biglm)
n<-50000
k<-80
y <- rgamma(n,1.5,1)
x <-round( matrix(rnorm(n*k),n,k),digits=3)
colnames(x) <-paste("s",1:k,sep = "")
da<- data.frame(y,x)
fo <- as.formula(paste("y~",paste(paste("s",1:k,sep=""),collapse="+")))
system.time(m1 <- glm(fo,data=da,family=Gamma(log)))
system.time(m2 <- bigglm(fo,data=da,family=Gamma(log)))
system.time(m3 <- speedglm(fo,data=da,family=Gamma(log)))
# You may also try speedglm when R is linked against an optimized BLAS,
# otherwise try to run the following function. In some computers, it is
# faster for large data sets.
system.time(m4 <- speedglm(fo,data=da,family=Gamma(log),set.default=list(row.chunk=1000)))
# }
# NOT RUN {
##################
# }
# NOT RUN {
## An example of function using a connection to an out-memory file
## This is a slightly modified version of the function from the bigglm's help page
make.data<-function(filename, chunksize,...){
conn<-NULL
function(reset=FALSE){
if(reset){
if(!is.null(conn)) close(conn)
conn<<-file(filename,open="r")
} else{
rval<-read.table(conn, nrows=chunksize,...)
if ((nrow(rval)==0)) {
close(conn)
conn<<-NULL
rval<-NULL
}
return(rval)
}
}
}
# data1 is a small toy dataset
data(data1)
write.table(data1,"data1.txt",row.names=FALSE,col.names=FALSE)
rm(data1)
da<-make.data("data1.txt",chunksize=50,col.names=c("y","fat1","x1","x2"))
# Caution! make sure to close the connection once you have run command #1
da(reset=T) #1: opens the connection to "data1.txt"
da(reset=F) #2: reads the first 50 rows (out of 100) of the dataset
da(reset=F) #3: reads the second 50 rows (out of 100) of the dataset
da(reset=F) #4: is NULL: this latter command closes the connection
require(biglm)
# fat1 is a factor with four levels
b1<-shglm(y~factor(fat1)+x1,weights=~I(x2^2),datafun=da,family=Gamma(log))
b2<-bigglm(y~factor(fat1)+x1,weights=~I(x2^2),data=da,family=Gamma(log))
summary(b1)
summary(b2)
file.remove("data1.txt")
# }
Run the code above in your browser using DataLab