## Prerequisite
data(geneByHp, hp_descendants, package="PCAN")
geneByHp <- unstack(geneByHp, entrez~hp)
ic <- computeHpIC(geneByHp, hp_descendants)
###########################################
## Use case: comparing a gene and a disease
data(traitDef, geneDef, hp_ancestors, hpDef, package="PCAN")
omim <- "612285"
traitDef[which(traitDef$id==omim),]
entrez <- "57545"
geneDef[which(geneDef$entrez==entrez),]
## Get HP terms associated to the disease
data(hpByTrait, package="PCAN")
hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)]
## Get HP terms associated to the gene
hpByGene <- unstack(stack(geneByHp), ind~values)
geneHps <- hpByGene[[entrez]]
## Comparison of the two sets of HP terms
compMat <- compareHPSets(
hpSet1=geneHps, hpSet2=hpOfInterest,
IC=ic,
ancestors=hp_ancestors,
method="Resnik",
BPPARAM=SerialParam()
)
## Get the symmetric semantic similarity score
hpSetCompSummary(compMat, method="bma", direction="symSim")
bm <- hpSetCompBestMatch(compMat, "b")
hpDef[match(c(bm$compared, bm$candidate), hpDef$id),]
## Assessing the significance of this score by comparing to all other genes
hpGeneResnik <- compareHPSets(
hpSet1=names(ic), hpSet2=hpOfInterest,
IC=ic,
ancestors=hp_ancestors,
method="Resnik",
BPPARAM=SerialParam()
)
hpMatByGene <- lapply(
hpByGene,
function(x){
hpGeneResnik[x, , drop=FALSE]
}
)
resnSss <- unlist(lapply(
hpMatByGene,
hpSetCompSummary,
method="bma", direction="symSim"
))
candScore <- resnSss[entrez]
hist(
resnSss,
breaks=100, col="grey",
ylim=c(0,300),
xlab=expression(Sim[sym]),
ylab="Number of genes",
main=paste(
"Distribution of symmetric semantic similarity scores\nfor all the",
length(resnSss), "genes"
)
)
polygon(
x=c(candScore, 10, 10, candScore),
y=c(-10, -10, 1000, 1000),
border="#BE0000",
col="#BE000080",
lwd=3
)
withHigher <- sum(resnSss >= candScore)
text(
x=candScore, y=mean(par()$usr[3:4]),
paste0(
withHigher, " genes (",
signif(withHigher*100/length(resnSss), 2), "%)\n",
"show a symmetric semantic\n",
"similarity score greater than\n",
"the gene candidate for\n",
"for the HPs under focus."
),
pos=4,
cex=0.6
)
Run the code above in your browser using DataLab