if (FALSE) {
wd = system.file("extdata",package="stmgp")
# quantitative traits
# training data (plink format)
trainbed = paste(wd,"train",sep="/")
# test data (plink format)
testbed = paste(wd,"test",sep="/")
# number of SNPs
#n.snp = length(readLines(paste(trainbed,".bim",sep="")))
n.snp = 80000
#> head(read.table(paste0(trainbed,".fam")))
# training sample .fam file (quantitative phenotype in the 6th column)
# V1 V2 V3 V4 V5 V6
#1 id1_100 id2_100 0 0 1 -1.23
#2 id1_101 id2_101 0 0 1 1.48
#3 id1_102 id2_102 0 0 1 -4.27
#4 id1_103 id2_103 0 0 1 -2.61
#5 id1_104 id2_104 0 0 1 -0.27
#6 id1_105 id2_105 0 0 1 -0.50
#> head(read.table(paste0(testbed,".fam")))
# test sample .fam file
# (quantitative phenotype in the 6th column but missing (i.e. "-9") allowed)
# V1 V2 V3 V4 V5 V6
#1 id1_0 id2_0 0 0 1 -0.59
#2 id1_1 id2_1 0 0 1 1.11
#3 id1_2 id2_2 0 0 1 -2.45
#4 id1_3 id2_3 0 0 1 0.11
#5 id1_4 id2_4 0 0 1 -1.17
#6 id1_5 id2_5 0 0 1 2.08
pSum = read.table(paste0(wd,"/pSum.txt"));
rownames(pSum) = pSum[,2]; pSum = pSum[,7:8,drop=F]
# summary p-values format
# ("rownames(pSum)" for SNP ID should be provided; "NA" means unavailable)
#> head(pSum,15) # summary p-values from two studies
# V7 V8
#rs12255619 NA NA
#rs11252546 NA NA
#rs7909677 NA NA
#rs10904494 NA NA
#rs11591988 NA NA
#rs4508132 NA NA
#rs10904561 NA NA
#rs7917054 NA NA
#rs7906287 NA NA
#rs12775579 NA NA
#rs4495823 0.08731436 0.2150108
#rs11253478 0.24030258 0.8726241
#rs9419557 0.49243856 0.3823173
#rs9286070 0.31277506 0.8521706
#rs9419560 NA 0.7604134
# model building from training data without covariates
sp1 = stmgplink(trainbed=trainbed,maxal=5000/n.snp)
head(sp1$PE)
# model building from training data to predict test data without covariates
sp2 = stmgplink(trainbed=trainbed,testbed=testbed,maxal=5000/n.snp)
head(sp2$PEte)
head(sp2$nonzero)
# model building from training data to predict test data without covariates
# (using 1 pSum)
sp2p = stmgplink(trainbed=trainbed,testbed=testbed,maxal=5000/n.snp,
pSum=pSum[,1,drop=F])
head(sp2p$PEte)
head(sp2p$nonzero)
# model building from training data to predict test data without covariates
# (using 2 pSum)
sp2pp = stmgplink(trainbed=trainbed,testbed=testbed,maxal=5000/n.snp,pSum=pSum)
head(sp2pp$PEte)
head(sp2pp$nonzero)
# using covariates files
Zf = paste(wd,"train.cov",sep="/")
Ztef = paste(wd,"test.cov",sep="/")
#> head(read.table(Zf,header=TRUE)) # covariate for training sample
# FID IID COV1 COV2 qphen bphen
#1 id1_340 id2_340 1.27203944 1 -2.47 2
#2 id1_430 id2_430 -0.44144482 1 -0.71 2
#3 id1_199 id2_199 -0.18200011 1 -3.42 2
#4 id1_473 id2_473 0.03965880 0 0.32 1
#5 id1_105 id2_105 0.20418279 0 -0.50 2
#6 id1_188 id2_188 -0.04838519 0 2.98 1
#> head(read.table(Zfte,header=TRUE)) # covariate for test sample
# FID IID COV1 COV2
#1 id1_80 id2_80 -0.2057512 0
#2 id1_53 id2_53 -0.8627601 1
#3 id1_59 id2_59 -0.2973529 1
#4 id1_71 id2_71 1.4728727 1
#5 id1_92 id2_92 3.5614472 0
#6 id1_25 id2_25 0.5135032 1
# model building from training data
sp3 = stmgplink(trainbed=trainbed,maxal=5000/n.snp,Z=Zf,Znames=c("COV1","COV2"))
head(sp3$PE)
# model building from training data and predicting test data
sp4 = stmgplink(trainbed=trainbed,testbed=testbed,maxal=5000/n.snp,Z=Zf,Zte=Ztef,
Znames=c("COV1","COV2"))
head(sp4$PEte)
head(sp4$nonzero)
# model building from training data and predicting test data (using 1 pSum)
sp4p = stmgplink(trainbed=trainbed,testbed=testbed,maxal=5000/n.snp,
Z=Zf,Zte=Ztef,Znames=c("COV1","COV2"),pSum=pSum[,1,drop=F])
head(sp4p$PEte)
head(sp4p$nonzero)
# model building from training data and predicting test data (using 2 pSum)
sp4pp = stmgplink(trainbed=trainbed,testbed=testbed,maxal=5000/n.snp,
Z=Zf,Zte=Ztef,Znames=c("COV1","COV2"),pSum=pSum)
head(sp4pp$PEte)
head(sp4pp$nonzero)
# no summary p-values
cor(sp2$PE[,6:7],use="pair")[1,2] # training (no covariate)
cor(sp2$PEte[,6:7],use="pair")[1,2] # test (no covariate)
cor(sp4$PE[,6:7],use="pair")[1,2] # training (covariates)
cor(sp4$PEte[,6:7],use="pair")[1,2] # test (covariates)
# 1 summary p-values
cor(sp2p$PE[,6:7],use="pair")[1,2] # training (no covariate)
cor(sp2p$PEte[,6:7],use="pair")[1,2] # test (no covariate)
cor(sp4p$PE[,6:7],use="pair")[1,2] # training (covariates)
cor(sp4p$PEte[,6:7],use="pair")[1,2] # test (covariates)
# 2 summary p-values
cor(sp2pp$PE[,6:7],use="pair")[1,2] # training (no covariate)
cor(sp2pp$PEte[,6:7],use="pair")[1,2] # test (no covariate)
cor(sp4pp$PE[,6:7],use="pair")[1,2] # training (covariates)
cor(sp4pp$PEte[,6:7],use="pair")[1,2] # test (covariates)
#### binary traits ####
# training data (plink format)
trainbed = paste(wd,"train",sep="/")
# test data (plink format)
testbed = paste(wd,"test",sep="/")
# number of SNPs
#n.snp = length(readLines(paste(trainbed,".bim",sep="")))
n.snp = 80000
#> head(read.table(paste0(trainbed,"b.fam")))
# training sample .fam file (binary phenotype (1 or 2) in the 6th column)
# V1 V2 V3 V4 V5 V6
#1 id1_100 id2_100 0 0 1 2
#2 id1_101 id2_101 0 0 1 1
#3 id1_102 id2_102 0 0 1 2
#4 id1_103 id2_103 0 0 1 2
#5 id1_104 id2_104 0 0 1 2
#6 id1_105 id2_105 0 0 1 2
#> head(read.table(paste0(testbed,"b.fam")))
# test sample .fam file (binary phenotype (1 or 2) in the 6th column
# but missing (i.e. "-9") allowed)
# V1 V2 V3 V4 V5 V6
#1 id1_0 id2_0 0 0 1 2
#2 id1_1 id2_1 0 0 1 1
#3 id1_2 id2_2 0 0 1 2
#4 id1_3 id2_3 0 0 1 1
#5 id1_4 id2_4 0 0 1 2
#6 id1_5 id2_5 0 0 1 1
# model building from training data without covariates
sp1b = stmgplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),maxal=5000/n.snp)
head(sp1b$PE)
# model building from training data to predict test data without covariates
sp2b = stmgplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
testbed=paste0(testbed,c(".bed",".bim","b.fam")),maxal=5000/n.snp)
head(sp2b$PEte)
head(sp2b$nonzero)
# model building from training data to predict test data without covariates
# (using 1 pSum)
sp2bp = stmgplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
testbed=paste0(testbed,c(".bed",".bim","b.fam")),maxal=5000/n.snp,
pSum=pSum[,1,drop=F])
head(sp2bp$PEte)
head(sp2bp$nonzero)
# model building from training data to predict test data without covariates
# (using 2 pSum)
sp2bpp = stmgplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
testbed=paste0(testbed,c(".bed",".bim","b.fam")),maxal=5000/n.snp,pSum=pSum)
head(sp2bpp$PEte)
head(sp2bpp$nonzero)
# using covariates files
# model building from training data
sp3b = stmgplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
maxal=5000/n.snp,Z=paste(wd,"train.cov",sep="/"),Znames=c("COV1","COV2"))
head(sp3b$PE)
# model building from training data and predicting test data
sp4b = stmgplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
testbed=paste0(testbed,c(".bed",".bim","b.fam")),maxal=5000/n.snp,Z=Zf,Zte=Ztef,
Znames=c("COV1","COV2"))
head(sp4b$PEte)
head(sp4b$nonzero)
# model building from training data and predicting test data (using 1 pSum)
sp4bp = stmgplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
testbed=paste0(testbed,c(".bed",".bim","b.fam")),maxal=5000/n.snp,
Z=Zf,Zte=Ztef,Znames=c("COV1","COV2"),pSum=pSum[,1,drop=F])
head(sp4bp$PEte)
head(sp4bp$nonzero)
# model building from training data and predicting test data (using 2 pSum)
sp4bpp = stmgplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
testbed=paste0(testbed,c(".bed",".bim","b.fam")),maxal=5000/n.snp,
Z=Zf,Zte=Ztef,Znames=c("COV1","COV2"),pSum=pSum)
head(sp4bpp$PEte)
head(sp4bpp$nonzero)
# no summary p-values
cor(sp2b$PE[,6:7],use="pair")[1,2] # training (no covariate)
cor(sp2b$PEte[,6:7],use="pair")[1,2] # test (no covariate)
cor(sp4b$PE[,6:7],use="pair")[1,2] # training (covariates)
cor(sp4b$PEte[,6:7],use="pair")[1,2] # test (covariates)
# 1 summary p-values
cor(sp2bp$PE[,6:7],use="pair")[1,2] # trainig (no covariate)
cor(sp2bp$PEte[,6:7],use="pair")[1,2] # test (no covariate)
cor(sp4bp$PE[,6:7],use="pair")[1,2] # training (covariates)
cor(sp4bp$PEte[,6:7],use="pair")[1,2] # test (covariates)
# 2 summary p-values
cor(sp2bpp$PE[,6:7],use="pair")[1,2] # training (no covariate)
cor(sp2bpp$PEte[,6:7],use="pair")[1,2] # test (no covariate)
cor(sp4bpp$PE[,6:7],use="pair")[1,2] # training (covariates)
cor(sp4bpp$PEte[,6:7],use="pair")[1,2] # test (covariates)
}
Run the code above in your browser using DataLab