# make a "hlaAlleleClass" object
hla.id <- "A"
hla <- hlaAllele(HLA_Type_Table$sample.id,
H1 = HLA_Type_Table[, paste(hla.id, ".1", sep="")],
H2 = HLA_Type_Table[, paste(hla.id, ".2", sep="")],
locus=hla.id, assembly="hg19")
# divide HLA types randomly
set.seed(100)
hlatab <- hlaSplitAllele(hla, train.prop=0.5)
names(hlatab)
# "training" "validation"
summary(hlatab$training)
summary(hlatab$validation)
# SNP predictors within the flanking region on each side
region <- 500 # kb
snpid <- hlaFlankingSNP(HapMap_CEU_Geno$snp.id, HapMap_CEU_Geno$snp.position,
hla.id, region*1000, assembly="hg19")
length(snpid) # 275
# training and validation genotypes
train.geno <- hlaGenoSubset(HapMap_CEU_Geno,
snp.sel=match(snpid, HapMap_CEU_Geno$snp.id),
samp.sel=match(hlatab$training$value$sample.id,
HapMap_CEU_Geno$sample.id))
test.geno <- hlaGenoSubset(HapMap_CEU_Geno,
samp.sel=match(hlatab$validation$value$sample.id,
HapMap_CEU_Geno$sample.id))
# train HIBAG models
set.seed(100)
# please use "nclassifier=100" when you use HIBAG for real data
m1 <- hlaAttrBagging(hlatab$training, train.geno, nclassifier=2,
verbose.detail=TRUE)
m2 <- hlaAttrBagging(hlatab$training, train.geno, nclassifier=2,
verbose.detail=TRUE)
# validation
pd1 <- predict(m1, test.geno, type="response+prob", vote="majority")
pd2 <- predict(m2, test.geno, type="response+prob", vote="majority")
hlaCompareAllele(hlatab$validation, pd1)$overall
hlaCompareAllele(hlatab$validation, pd2)$overall
# merge predictions from multiple models, by voting from all classifiers
pd <- hlaPredMerge(pd1, pd2, weight=c(1,1))
hlaCompareAllele(hlatab$validation, pd)$overall
Run the code above in your browser using DataLab