data(geneByHp, hp_descendants, package="PCAN")
data(hp_ancestors, hpDef, package="PCAN")
data(traitDef, geneDef, package="PCAN")
data(hpByTrait, package="PCAN")
geneByHp <- unstack(geneByHp, entrez~hp)
###########################################
## Compute information content of each HP according to associated genes
ic <- computeHpIC(geneByHp, hp_descendants)
###########################################
## Use case: comparing a gene and a disease
omim <- "612285"
traitDef[which(traitDef$id==omim),]
entrez <- "57545"
geneDef[which(geneDef$entrez==entrez),]
## Get HP terms associated to the disease
hpOfInterest <- hpByTrait$hp[which(hpByTrait$id==omim)]
## Get HP terms associated to the gene
hpByGene <- unstack(stack(geneByHp), ind~values)
geneHps <- hpByGene[[entrez]]
## HP Comparison
hpGeneResnik <- compareHPSets(
hpSet1=names(ic), hpSet2=hpOfInterest,
IC=ic,
ancestors=hp_ancestors,
method="Resnik",
BPPARAM=SerialParam()
)
hpMatByGene <- lapply(
hpByGene,
function(x){
hpGeneResnik[x, , drop=FALSE]
}
)
resnSss <- unlist(lapply(
hpMatByGene,
hpSetCompSummary,
method="bma", direction="symSim"
))
candScore <- resnSss[entrez]
###########################################
## The pathway consensus approach
## What about genes belonging to the same pathways as the candidate
data(rPath, hsEntrezByRPath, package="PCAN")
candPath <- names(hsEntrezByRPath)[which(unlist(lapply(
hsEntrezByRPath,
function(x) entrez %in% x
)))]
rPath[which(rPath$Pathway %in% candPath),]
rPathRes <- hpGeneListComp(
geneList=hsEntrezByRPath[[candPath]],
ssMatByGene = hpMatByGene,
geneSSScore = resnSss
)
hist(
resnSss,
breaks=100, col="grey",
ylim=c(0,5),
xlab=expression(Sim[sym]),
ylab="Density",
main=paste(
"Distribution of symmetric semantic similarity scores for all the",
length(resnSss), "genes"
),
probability=TRUE
)
toAdd <- hist(
rPathRes$scores,
breaks=100,
plot=FALSE
)
for(i in 1:length(toAdd$density)){
polygon(
x=toAdd$breaks[c(i, i+1, i+1, i)],
y=c(0, 0, rep(toAdd$density[i], 2)),
col="#BE000040",
border="#800000FF"
)
}
legend(
"topright",
paste0(
"Genes belonging to the ", candPath," pathway:\n'",
rPath[which(rPath$Pathway %in% candPath),"Pathway_name"],
"'\nand with a symmetric semantic similarity score (",
sum(!is.na(rPathRes$scores)),
"/",
length(rPathRes$scores),
")\n",
"p-value: ", signif(rPathRes$p.value, 2)
),
pch=15,
col="#BE000040",
bty="n",
cex=0.6
)
## Assessing the symmetric semantic similarity for each gene in the pathway
pathSss <- rPathRes$scores[which(!is.na(rPathRes$scores))]
names(pathSss) <- geneDef[match(names(pathSss), geneDef$entrez), "symbol"]
opar <- par(mar=c(7.1, 4.1, 4.1, 2.1))
barplot(
sort(pathSss),
las=2,
ylab=expression(Sim[sym]),
main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"]
)
p <- c(0.25, 0.5, 0.75, 0.95)
abline(
h=quantile(resnSss, probs=p),
col="#BE0000",
lty=c(2, 1, 2, 2),
lwd=c(2, 2, 2, 1)
)
text(
rep(0,4),
quantile(resnSss, probs=p),
p,
pos=3,
offset=0,
col="#BE0000",
cex=0.6
)
legend(
"topleft",
paste0(
"Quantiles of the distribution of symmetric semantic similarity\n",
"scores for all the genes."
),
lty=1,
col="#BE0000",
cex=0.6
)
par(opar)
## A heatmap showing the best HP match for each gene in the pathway
geneLabels <- geneDef$symbol[which(!duplicated(geneDef$entrez))]
names(geneLabels) <- geneDef$entrez[which(!duplicated(geneDef$entrez))]
hpLabels <- hpDef$name
names(hpLabels) <- hpDef$id
hpGeneHeatmap(
rPathRes,
genesOfInterest=entrez,
geneLabels=geneLabels,
hpLabels=hpLabels,
clustByGene=TRUE,
clustByHp=TRUE,
palFun=colorRampPalette(c("white", "red")),
goiCol="blue",
main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"]
)
###########################################
## What about genes interacting with the candidate (including itself)
data(hqStrNw, package="PCAN")
neighbors <- unique(c(
hqStrNw$gene1[which(hqStrNw$gene2==entrez)],
hqStrNw$gene2[which(hqStrNw$gene1==entrez)],
entrez
))
neighRes <- hpGeneListComp(
geneList=neighbors,
ssMatByGene = hpMatByGene,
geneSSScore = resnSss
)
hist(
resnSss,
breaks=100, col="grey",
ylim=c(0,10),
xlab=expression(Sim[sym]),
ylab="Density",
main=paste(
"Distribution of symmetric semantic similarity scores for all the",
length(resnSss), "genes"
),
probability=TRUE
)
toAdd <- hist(
neighRes$scores,
breaks=100,
plot=FALSE
)
for(i in 1:length(toAdd$density)){
polygon(
x=toAdd$breaks[c(i, i+1, i+1, i)],
y=c(0, 0, rep(toAdd$density[i], 2)),
col="#BE000040",
border="#800000FF"
)
}
legend(
"topright",
paste0(
"Genes interacting with ",
geneDef[which(geneDef$entrez==entrez),"symbol"],
" (", entrez, ")",
"\nand with a symmetric semantic similarity score (",
sum(!is.na(neighRes$scores)),
"/",
length(neighRes$scores),
")\n",
"p-value: ", signif(neighRes$p.value, 2)
),
pch=15,
col="#BE000040",
bty="n",
cex=0.6
)
## Assessing the symmetric semantic similarity score for each interacting gene
neighSss <- neighRes$scores[which(!is.na(neighRes$scores))]
names(neighSss) <- geneDef[match(names(neighSss), geneDef$entrez), "symbol"]
opar <- par(mar=c(7.1, 4.1, 4.1, 2.1))
barplot(
sort(neighSss),
las=2,
ylab=expression(Sim[sym]),
main=paste0(
"Genes interacting with ",
geneDef[which(geneDef$entrez==entrez),"symbol"],
" (", entrez, ")"
)
)
p <- c(0.25, 0.5, 0.75, 0.95)
abline(
h=quantile(resnSss, probs=p),
col="#BE0000",
lty=c(2, 1, 2, 2),
lwd=c(2, 2, 2, 1)
)
text(
rep(0,4),
quantile(resnSss, probs=p),
p,
pos=3,
offset=0,
col="#BE0000",
cex=0.6
)
legend(
"topleft",
paste0(
"Quantiles of the distribution of symmetric semantic similarity\n",
"scores for all the genes."
),
lty=1,
col="#BE0000",
cex=0.6
)
par(opar)
## A heatmap showing the best HP match for each neighbor gene
hpGeneHeatmap(
neighRes,
genesOfInterest=entrez,
geneLabels=geneLabels,
hpLabels=hpLabels,
clustByGene=TRUE,
clustByHp=TRUE,
palFun=colorRampPalette(c("white", "red")),
goiCol="blue",
main=rPath[which(rPath$Pathway %in% candPath),"Pathway_name"]
)
Run the code above in your browser using DataLab