# Take a subset of the samples so the example runs faster
# Here we are taking samples from the odd months
sub_sample <- rownames(meta_table)[(meta_table$day_of_life%/%12)%%2==1]
count_table_sub <- count_table[sub_sample,]
processed_table_sub <- processed_table[sub_sample,]
meta_table_sub <- meta_table[sub_sample,]
# split the example data into training and testing
id_test <- meta_table_sub$studyid=="2"
count_train <- count_table_sub[!id_test,]
meta_train <- meta_table_sub[!id_test,]
count_test <- count_table_sub[id_test,]
meta_test <- meta_table_sub[id_test,]
# run tempted on training data
datlist_train <- format_tempted(count_train,
meta_train$day_of_life,
meta_train$studyid,
threshold=0.95,
pseudo=0.5,
transform="clr")
mean_svd_train <- svd_centralize(datlist_train, r=1)
res_tempted_train <- tempted(mean_svd_train$datlist,
r=2, smooth=1e-5)
# get the overlapping features
count_test <- count_test[,rownames(datlist_train[[1]])[-1]]
datlist_test <- format_tempted(count_test,
meta_test$day_of_life,
meta_test$studyid,
threshold=1,
pseudo=0.5,
transform="clr")
# estimate the subject loading of the testing subject
sub_test <- est_test_subject(datlist_test, res_tempted_train, mean_svd_train)
# train logistic regression classifier on training subjects
metauni <- unique(meta_table_sub[,c("studyid", "delivery")])
rownames(metauni) <- metauni$studyid
Atrain <- as.data.frame(res_tempted_train$A_hat)
Atrain$delivery <- metauni[rownames(Atrain),"delivery"]=="Cesarean"
glm_train <- glm(delivery ~ PC1+PC2,
data=Atrain, family=binomial(link="logit"))
summary(glm_train)
# predict the label of testing subject, whose true label is "Cesarean"
predict(glm_train, newdata=as.data.frame(sub_test), type="response")
Run the code above in your browser using DataLab