# NOT RUN {
orig.dir <- getwd(); setwd(tempdir()); # move to temporary dir
# Collate all file names to use in this example #
all.fn <- c("rownames.txt","colnames.txt","functestdn.txt","funclongcol.txt","functest.txt",
paste("rn",1:3,".txt",sep=""),paste("cn",1:3,".txt",sep=""),
paste("split",1:3,".txt",sep=""),
paste("splitmatCd",1:3,".txt",sep=""),paste("splitmatRd",1:3,".txt",sep=""),
paste("splitmatC",1:3,".txt",sep=""), paste("splitmatR",1:3,".txt",sep=""))
any.already <- file.exists(all.fn)
if(any(any.already)) {
warning("files already exist in the working directory with the same names as some example files") }
# SETUP a test matrix and reference files #
test.size <- 4 # try increasing this number for larger matrices
M <- matrix(runif(10^test.size),ncol=10^(test.size-2)) # normal matrix
write.table(M,sep="\t",col.names=FALSE,row.names=FALSE,
file="functest.txt",quote=FALSE) # no dimnames
rown <- paste("rs",sample(10:99,nrow(M),replace=TRUE),sample(10000:99999,nrow(M)),sep="")
coln <- paste("ID",sample(1:9,ncol(M),replace=TRUE),sample(10000:99999,ncol(M)),sep="")
r.fn <- "rownames.txt"; c.fn <- "colnames.txt"
Mdn <- M; colnames(Mdn) <- coln; rownames(Mdn) <- rown
# with dimnames
write.table(Mdn,sep="\t",col.names=TRUE,row.names=TRUE,file="functestdn.txt",quote=FALSE)
prv.large(Mdn)
writeLines(paste(as.vector(M)),con="funclongcol.txt")
in.fn <- "functest.txt"
### IMPORTING SIMPLE 1 FILE MATRIX ##
writeLines(rown,r.fn); writeLines(coln,c.fn)
#1. import without specifying row/column names
ii <- import.big.data(in.fn); prv.big.matrix(ii) # SLOWER without dimnames!
#2. import using row/col names from file
ii <- import.big.data(in.fn,cols.fn="colnames.txt",rows.fn="rownames.txt", pref="p1")
prv.big.matrix(ii)
#3. import by passing colnames/rownames as objects
ii <- import.big.data(in.fn, col.names=coln,row.names=rown, pref="p2")
prv.big.matrix(ii)
### IMPORTING SIMPLE 1 FILE MATRIX WITH DIMNAMES ##
#1. import without specifying row/column names, but they ARE in the file
in.fn <- "functestdn.txt"
ii <- import.big.data(in.fn, pref="p3"); prv.big.matrix(ii)
### IMPORTING SIMPLE 1 FILE MATRIX WITH MISORDERED rownames ##
rown2 <- rown; rown <- sample(rown);
# re-run test3 using in.fn with dimnames
ii <- import.big.data(in.fn, col.names=coln,row.names=rown, pref="p4")
prv.big.matrix(ii)
# restore rownames:
rown <- rown2
### IMPORTING SIMPLE 1 FILE LONG FORMAT by columns ##
in.fn <- "funclongcol.txt"; #rerun test 2 #
ii <- import.big.data(in.fn,cols.fn="colnames.txt",rows.fn="rownames.txt", pref="p5")
prv.big.matrix(ii)
### IMPORTING multifile LONG by cols ##
# create the dataset and references
splF <- factor(rep(c(1:3),ncol(M)*c(.1,.5,.4)))
colnL <- split(coln,splF); MM <- as.data.frame(t(M))
Ms2 <- split(MM,splF)
Ms2 <- lapply(Ms2,
function(X) { X <- t(X); dim(X) <- c(nrow(M),length(X)/nrow(M)); X } )
# preview Ms2 - not run # lapply(Ms2,prv.large)
colfs <- paste("cn",1:length(colnL),".txt",sep="")
infs <- paste("split",1:length(colnL),".txt",sep="")
# create multiple column name files and input files
for(cc in 1:length(colnL)) { writeLines(colnL[[cc]],con=colfs[cc]) }
for(cc in 1:length(infs)) {
writeLines(paste(as.vector((Ms2[[cc]]))),con=infs[cc]) }
# Now test the import using colnames and rownames lists
ii <- import.big.data(infs, col.names=colnL,row.names=rown, pref="p6")
prv.big.matrix(ii)
### IMPORTING multifile MATRIX by rows ##
# create the dataset and references
splF <- factor(rep(c(1,2,3),nrow(M)*c(.1,.5,.4)))
rownL <- split(rown,splF)
Ms <- split(M,splF)
Ms <- lapply(Ms,function(X) { dim(X) <- c(length(X)/ncol(M),ncol(M)); X } )
# preview Ms - not run # lapply(Ms,prv.large)
# create multiple row name files and input files
rowfs <- paste("rn",1:length(rownL),".txt",sep="")
for(cc in 1:length(rownL)) { writeLines(rownL[[cc]],con=rowfs[cc]) }
infs <- paste("splitmatR",1:length(colnL),".txt",sep="")
for(cc in 1:length(infs)) {
write.table(Ms[[cc]],sep="\t",col.names=FALSE,row.names=FALSE,file=infs[cc],quote=FALSE) }
# Now test the import using colnames and rownames files
ii <- import.big.data(infs, col.names="colnames.txt",rows.fn=rowfs, pref="p7")
prv.big.matrix(ii)
# DELETE ALL FILES ##
unlink(all.fn[!any.already]) # prevent deleting user's files
## many files to clean up! ##
unlink(c("funclongcol.bck","funclongcol.dsc","functest.bck","functest.dsc",
"functestdn.RData","functestdn.bck","functestdn.dsc","functestdn_file_rowname_list_check_this.txt",
"split1.bck","split1.dsc","splitmatR1.bck","splitmatR1.dsc", paste0("p",2:7)))
setwd(orig.dir) # reset working dir to original
# }
Run the code above in your browser using DataLab