if (FALSE) {
wd = system.file("extdata",package="stmgp")
# quantitative traits
# training data (plink format)
trainbed = paste(wd,"train",sep="/")
# test data (plink format)
testbed = paste(wd,"test",sep="/")
# number of SNPs
#n.snp = length(readLines(paste(trainbed,".bim",sep="")))
n.snp = 80000
#> head(read.table(paste0(trainbed,".fam")))
# training sample .fam file (quantitative phenotype in the 6th column)
# V1 V2 V3 V4 V5 V6
#1 id1_100 id2_100 0 0 1 -1.23
#2 id1_101 id2_101 0 0 1 1.48
#3 id1_102 id2_102 0 0 1 -4.27
#4 id1_103 id2_103 0 0 1 -2.61
#5 id1_104 id2_104 0 0 1 -0.27
#6 id1_105 id2_105 0 0 1 -0.50
#> head(read.table(paste0(testbed,".fam")))
# test sample .fam file
# (quantitative phenotype in the 6th column but missing (i.e. "-9") allowed)
# V1 V2 V3 V4 V5 V6
#1 id1_0 id2_0 0 0 1 -0.59
#2 id1_1 id2_1 0 0 1 1.11
#3 id1_2 id2_2 0 0 1 -2.45
#4 id1_3 id2_3 0 0 1 0.11
#5 id1_4 id2_4 0 0 1 -1.17
#6 id1_5 id2_5 0 0 1 2.08
# using covariates files
Zf = paste(wd,"train.cov",sep="/")
Ztef = paste(wd,"test.cov",sep="/")
#> head(read.table(Zf,header=TRUE)) # covariate for training sample
# FID IID COV1 COV2 qphen bphen
#1 id1_340 id2_340 1.27203944 1 -2.47 2
#2 id1_430 id2_430 -0.44144482 1 -0.71 2
#3 id1_199 id2_199 -0.18200011 1 -3.42 2
#4 id1_473 id2_473 0.03965880 0 0.32 1
#5 id1_105 id2_105 0.20418279 0 -0.50 2
#6 id1_188 id2_188 -0.04838519 0 2.98 1
#> head(read.table(Ztef,header=TRUE)) # covariate for test sample
# FID IID COV1 COV2
#1 id1_80 id2_80 -0.2057512 0
#2 id1_53 id2_53 -0.8627601 1
#3 id1_59 id2_59 -0.2973529 1
#4 id1_71 id2_71 1.4728727 1
#5 id1_92 id2_92 3.5614472 0
#6 id1_25 id2_25 0.5135032 1
# model building from training data
# (incorporating COV1xG interaction as well as two covariates, COV1 and COV2)
sge1p0 = stmgeplink(trainbed=trainbed,Z=Zf,Enames="COV1",
maxal=5000/n.snp,Znames=c("COV1","COV2"))
head(sge1p0$PE)
# model building from training data and predicting test data
# (incorporating COV1xG interaction as well as two covariates, COV1 and COV2)
sge1p = stmgeplink(trainbed=trainbed,testbed=testbed,Z=Zf,Zte=Ztef,Enames="COV1",
maxal=5000/n.snp,Znames=c("COV1","COV2"))
head(sge1p$PEte)
head(sge1p$nonzero)
# model building from training data
# (incorporating COV1xG and COV2xG interactions as well as two covariates, COV1 and COV2)
sge12p0 = stmgeplink(trainbed=trainbed,Z=Zf,Enames=c("COV1","COV2"),
maxal=5000/n.snp,Znames=c("COV1","COV2"))
head(sge12p0$PE)
# model building from training data and predicting test data
# (incorporating COV1xG and COV2xG interactions as well as two covariates, COV1 and COV2)
sge12p = stmgeplink(trainbed=trainbed,testbed=testbed,Z=Zf,Zte=Ztef,Enames=c("COV1","COV2"),
maxal=5000/n.snp,Znames=c("COV1","COV2"))
head(sge12p$PEte)
head(sge12p$nonzero)
#### binary traits ####
# training data (plink format)
trainbed = paste(wd,"train",sep="/")
# test data (plink format)
testbed = paste(wd,"test",sep="/")
# number of SNPs
#n.snp = length(readLines(paste(trainbed,".bim",sep="")))
n.snp = 80000
#> head(read.table(paste0(trainbed,"b.fam")))
# training sample .fam file (binary phenotype (1 or 2) in the 6th column)
# V1 V2 V3 V4 V5 V6
#1 id1_100 id2_100 0 0 1 2
#2 id1_101 id2_101 0 0 1 1
#3 id1_102 id2_102 0 0 1 2
#4 id1_103 id2_103 0 0 1 2
#5 id1_104 id2_104 0 0 1 2
#6 id1_105 id2_105 0 0 1 2
#> head(read.table(paste0(testbed,"b.fam")))
# test sample .fam file (binary phenotype (1 or 2) in the 6th column
# but missing (i.e. "-9") allowed)
# V1 V2 V3 V4 V5 V6
#1 id1_0 id2_0 0 0 1 2
#2 id1_1 id2_1 0 0 1 1
#3 id1_2 id2_2 0 0 1 2
#4 id1_3 id2_3 0 0 1 1
#5 id1_4 id2_4 0 0 1 2
#6 id1_5 id2_5 0 0 1 1
# using covariates files
# model building from training data
# (incorporating COV1xG interaction as well as two covariates, COV1 and COV2)
sge1p0b = stmgeplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
Z=paste(wd,"train.cov",sep="/"),Enames="COV1",
maxal=5000/n.snp,Znames=c("COV1","COV2"))
head(sge1p0b$PE)
# model building from training data and predicting test data
# (incorporating COV1xG interaction as well as two covariates, COV1 and COV2)
sge1pb = stmgeplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
testbed=paste0(testbed,c(".bed",".bim","b.fam")),
Z=paste(wd,"train.cov",sep="/"),Zte=paste(wd,"test.cov",sep="/"),Enames="COV1",
maxal=5000/n.snp,Znames=c("COV1","COV2"))
head(sge1pb$PEte)
head(sge1pb$nonzero)
# model building from training data
# (incorporating COV1xG and COV2xG interactions as well as two covariates, COV1 and COV2)
sge12p0b = stmgeplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
Z=paste(wd,"train.cov",sep="/"),Enames=c("COV1","COV2"),
maxal=5000/n.snp,Znames=c("COV1","COV2"))
head(sge12p0b$PE)
# model building from training data and predicting test data
# (incorporating COV1xG and COV2xG interactions as well as two covariates, COV1 and COV2)
sge12pb = stmgeplink(trainbed=paste0(trainbed,c(".bed",".bim","b.fam")),
testbed=paste0(testbed,c(".bed",".bim","b.fam")),
Z=paste(wd,"train.cov",sep="/"),Zte=paste(wd,"test.cov",sep="/"),
Enames=c("COV1","COV2"),maxal=5000/n.snp,Znames=c("COV1","COV2"))
head(sge12pb$PEte)
head(sge12pb$nonzero)
}
Run the code above in your browser using DataLab