# \donttest{
##Example 1: Use the internal Sickle Cell Disease data to find the rare
## phenotype. SCD is extremely rare so we use DBSCAN to initialise
## the VB GMM. We also use an informative prior for the mixing
## coefficient and stop iterations when the ELBO starts to reverse
## so that we stop when the minor (SCD) component is reached.
library(data.table)
# Load the SCD example data supplied with the VBphenoR package
data(scd_cohort)
# We will use the SCD biomarkers to discover the SCD latent class.
# X1 is the data matrix for the VB GMM.
X1 <- scd_cohort[,.(CBC,RC)]
# We need to supply DBSCAN hyper-parameters as we will initialise VBphenoR
# with DBSCAN. See help(DBSCAN) for details of these parameters.
initParams <- c(0.15, 5)
names(initParams) <- c('eps','minPts')
# Set an informative prior for the VB GMM mixing coefficient alpha
# hyper-parameter
prior_gmm <- list(
alpha = 0.001
)
# Set informative priors for the beta coefficients of the VB logit
prior_logit <- list(mu=c(1,
mean(scd_cohort$age),
mean(scd_cohort$highrisk),
mean(scd_cohort$CBC),
mean(scd_cohort$RC)),
Sigma=diag(1,5)) # Simplest isotropic case
# X2 is the design matrix for the VB logit
X2 <- scd_cohort[,.(age,highrisk,CBC,RC)]
X2[,age:=as.numeric(age)]
X2[,highrisk:=as.numeric(highrisk)]
X2[,Intercept:=1]
setcolorder(X2, c("Intercept","age","highrisk","CBC","RC"))
# Run the patient phenotyping model
# Need to state what columns are the biomarkers
biomarkers <- c('CBC', 'RC')
set.seed(123)
pheno_result <- run_Model(biomarkers,
gmm_X=X1, gmm_init="dbscan",
gmm_initParams=initParams,
gmm_maxiters=20, gmm_prior=prior_gmm,
gmm_stopIfELBOReverse=TRUE,
logit_X=X2, logit_prior=prior_logit
)
# S3 print method
print(pheno_result)
# }
Run the code above in your browser using DataLab