#compare a new geneset analysis method with PADOG and GSA
#define your new gene set analysis method that takes as input:
#set- the name of dataset file from the PADOGsetspackage
#mygslist - a list with the genesets
#minsize- minimum number of genes in a geneset to be considered for analysis
randomF=function(set,mygslist,minsize){
set.seed(1)
#this loads the dataset in an ExpressionSet object called x
data(list=set,package="KEGGdzPathwaysGEO")
x=get(set)
#Extract from the dataset the required info to be passed to padog
exp=experimentData(x);
dat.m=exprs(x)
ano=pData(x)
dataset= exp@name
design= notes(exp)$design
annotation= paste(x@annotation,".db",sep="")
targetGeneSets= notes(exp)$targetGeneSets
#get rid of duplicates probesets per ENTREZ ID by keeping the probeset
#with smallest p-value (computed using limma)
aT1=filteranot(esetm=dat.m,group=ano$Group,paired=(design=="Paired"),
block=ano$Block,annotation=annotation)
#create an output dataframe for this toy method with random gene set p-values
mygslistSize=unlist(lapply(mygslist,function(x){length(intersect(aT1$ENTREZID,x))}))
res=data.frame(ID=names(mygslist),P=runif(length(mygslist)),
Size=mygslistSize,stringsAsFactors=FALSE)
res$FDR=p.adjust(res$P,"fdr")
#drop genesets with less than minsize genes in the current dataset
res=res[res$Size>=minsize,]
#compute ranks
res$Rank=rank(res$P)/dim(res)[1]*100
#needed to compare ranks between methods; must be the same as given
#in mymethods argument "list(myRand="
res$Method="myRand";
#needed because comparisons of ranks between methods is paired at dataset level
res$Dataset<-dataset;
#output only result for the targetGeneSets
#which are gene sets expected to be relevant in this dataset
return(res[res$ID %in% targetGeneSets,])
}
#run the analysis on all 24 datasets and compare the new method "myRand" with
#PADOG and GSA (if installed) (chosen as reference since is listed first in the existingMethods)
#if the package parallel is installed datasets are analyzed in parallel.
#out=compPADOG(datasets=NULL,existingMethods=c("GSA","PADOG"),
#mymethods=list(myRand=randomF),
#gslist="KEGG.db",Nmin=3,NI=1000,plots=TRUE,verbose=FALSE)
#compare myRand against PADOG on 4 datasets only
#mysets=data(package="PADOGsets")$results[,"Item"]
mysets=c("GSE9348","GSE8671","GSE1297")
out=compPADOG(datasets=mysets,existingMethods=c("PADOG"),
mymethods=list(myRand=randomF),
gslist="KEGG.db",Nmin=3,NI=20,plots=FALSE,verbose=FALSE)Run the code above in your browser using DataLab