# Predicting survival in the sinking of the Titanic based on pasenger's sex, age, class, etc.
## Not run:
# DB <- RODM_open_dbms_connection(dsn="orcl11g", uid= "rodm", pwd = "rodm")
#
# data(titanic3, package="PASWR") # Load survival data from Titanic
# ds <- titanic3[,c("pclass", "survived", "sex", "age", "fare", "embarked")] # Select subset of attributes
# ds[,"survived"] <- ifelse(ds[,"survived"] == 1, "Yes", "No") # Rename target values
# n.rows <- length(ds[,1]) # Number of rows
# set.seed(seed=6218945)
# random_sample <- sample(1:n.rows, ceiling(n.rows/2)) # Split dataset randomly in train/test subsets
# titanic_train <- ds[random_sample,] # Training set
# titanic_test <- ds[setdiff(1:n.rows, random_sample),] # Test set
#
# RODM_create_dbms_table(DB, "titanic_train") # Push the training table to the database
# RODM_create_dbms_table(DB, "titanic_test") # Push the testing table to the database
#
# # If the target distribution does not reflect the actual distribution due
# # to specialized sampling, specify priors for the model
# priors <- data.frame(
# target_value = c("Yes", "No"),
# prior_probability = c(0.1, 0.9))
#
# # Create an ODM Naive Bayes model
# nb <- RODM_create_nb_model(
# database = DB, # Database ODBC channel identifier
# model_name = "titanic_nb_model", # ODM model name
# data_table_name = "titanic_train", # (in quotes) Data frame or database table containing the input dataset
# class_priors = priors, # user-specified priors
# target_column_name = "survived") # Target column name in data_table_name
#
# # Predict test data using the Naive Bayes model
# nb2 <- RODM_apply_model(
# database = DB, # Database ODBC channel identifier
# data_table_name = "titanic_test", # Database table containing the input dataset
# model_name = "titanic_nb_model", # ODM model name
# supplemental_cols = "survived") # Carry the target column to the output for analysis
#
# # Compute contingency matrix, performance statistics and ROC curve
# print(nb2$model.apply.results[1:10,]) # Print example of prediction results
# actual <- nb2$model.apply.results[, "SURVIVED"]
# predicted <- nb2$model.apply.results[, "PREDICTION"]
# probs <- as.real(as.character(nb2$model.apply.results[, "'Yes'"]))
# table(actual, predicted, dnn = c("Actual", "Predicted")) # Confusion matrix
#
# library(verification)
# perf.auc <- roc.area(ifelse(actual == "Yes", 1, 0), probs) # Compute ROC and plot
# auc.roc <- signif(perf.auc$A, digits=3)
# auc.roc.p <- signif(perf.auc$p.value, digits=3)
# roc.plot(ifelse(actual == "Yes", 1, 0), probs, binormal=T, plot="both", xlab="False Positive Rate",
# ylab="True Postive Rate", main= "Titanic survival ODM NB model ROC Curve")
# text(0.7, 0.4, labels= paste("AUC ROC:", signif(perf.auc$A, digits=3)))
# text(0.7, 0.3, labels= paste("p-value:", signif(perf.auc$p.value, digits=3)))
#
# nb # look at the model details
#
# RODM_drop_model(DB, "titanic_nb_model") # Drop the model
# RODM_drop_dbms_table(DB, "titanic_train") # Drop the training table in the database
# RODM_drop_dbms_table(DB, "titanic_test") # Drop the testing table in the database
#
# RODM_close_dbms_connection(DB)
# ## End(Not run)
Run the code above in your browser using DataLab