suppressPackageStartupMessages(library(ontologyIndex))
data(hpo)
set.seed(1)
#random set of terms with ancestors
terms <- get_ancestors(hpo, sample(hpo$id, size=30))
#set information content of terms (as if each term occurs with frequency `1/n`)
information_content <- get_term_info_content(hpo, term_sets=as.list(terms))
#similarity of term pairs
tsm <- get_term_sim_mat(hpo, information_content)
#5 random term sets (call them *phenotypes*) with (at most) 8 terms (removing redundant ones)
phenotypes <- lapply(replicate(simplify=FALSE, n=5,
expr=sample(terms, size=8)), minimal_set, ontology=hpo)
get_sim_mat(tsm, phenotypes)
Run the code above in your browser using DataLab