## Not run:
# DB <- RODM_open_dbms_connection(dsn="orcl11g", uid= "rodm", pwd = "rodm")
#
# ### GLM Classification
#
# # Predicting survival in the sinking of the Titanic based on pasenger's sex, age, class, etc.
#
# data(titanic3, package="PASWR") # Load survival data from Titanic
# ds <- titanic3[,c("pclass", "survived", "sex", "age", "fare", "embarked")] # Select subset of attributes
# ds[,"survived"] <- ifelse(ds[,"survived"] == 1, "Yes", "No") # Rename target values
# n.rows <- length(ds[,1]) # Number of rows
# random_sample <- sample(1:n.rows, ceiling(n.rows/2)) # Split dataset randomly in train/test subsets
# titanic_train <- ds[random_sample,] # Training set
# train.rows <- length(titanic_train[,1]) # Number of rows
# row.id <- matrix(seq(1, train.rows), nrow=train.rows, ncol=1, dimnames= list(NULL, c("ROW_ID"))) # Row id
# titanic_train <- cbind(row.id, titanic_train) # Add row id to dataset
# titanic_test <- ds[setdiff(1:n.rows, random_sample),] # Test set
# RODM_create_dbms_table(DB, "titanic_train") # Push the training table to the database
# RODM_create_dbms_table(DB, "titanic_test") # Push the testing table to the database
#
# # Weight one class more heavily than the other
# weights <- data.frame(
# target_value = c("Yes", "No"),
# class_weight = c(1, 10))
#
# glm <- RODM_create_glm_model(database = DB, # Create ODM GLM classification model
# data_table_name = "titanic_train",
# case_id_column_name = "ROW_ID",
# target_column_name = "survived",
# model_name = "GLM_MODEL",
# class_weights = weights,
# diagnostics_table_name = "GLM_DIAG",
# mining_function = "classification")
#
# glm2 <- RODM_apply_model(database = DB, # Predict test data
# data_table_name = "titanic_test",
# model_name = "GLM_MODEL",
# supplemental_cols = "survived")
#
# print(glm2$model.apply.results[1:10,]) # Print example of prediction results
# actual <- glm2$model.apply.results[, "SURVIVED"]
# predicted <- glm2$model.apply.results[, "PREDICTION"]
# probs <- as.real(as.character(glm2$model.apply.results[, "'Yes'"]))
# table(actual, predicted, dnn = c("Actual", "Predicted")) # Confusion matrix
# library(verification)
# perf.auc <- roc.area(ifelse(actual == "Yes", 1, 0), probs) # Compute ROC and plot
# auc.roc <- signif(perf.auc$A, digits=3)
# auc.roc.p <- signif(perf.auc$p.value, digits=3)
# roc.plot(ifelse(actual == "Yes", 1, 0), probs, binormal=T, plot="both", xlab="False Positive Rate",
# ylab="True Postive Rate", main= "Titanic survival ODM GLM model ROC Curve")
# text(0.7, 0.4, labels= paste("AUC ROC:", signif(perf.auc$A, digits=3)))
# text(0.7, 0.3, labels= paste("p-value:", signif(perf.auc$p.value, digits=3)))
#
# glm # look at the model details
#
# # access and look at the per-row diagnostics from model training
# diaginfo <- sqlQuery(DB, query = "SELECT * FROM GLM_DIAG")
# diaginfo
#
# RODM_drop_model(DB, "GLM_MODEL") # Drop the model
# RODM_drop_dbms_table(DB, "GLM_DIAG") # Drop the diagnostics table
# RODM_drop_dbms_table(DB, "titanic_train") # Drop the database table
# RODM_drop_dbms_table(DB, "titanic_test") # Drop the database table
# ## End(Not run)
### GLM Regression
## Not run:
# x1 <- 2 * runif(200)
# noise <- 3 * runif(200) - 1.5
# y1 <- 2 + 2*x1 + x1*x1 + noise
# dataset <- data.frame(x1, y1)
# names(dataset) <- c("X1", "Y1")
# RODM_create_dbms_table(DB, "dataset") # Push the training table to the database
#
# glm <- RODM_create_glm_model(database = DB, # Create ODM GLM model
# data_table_name = "dataset",
# target_column_name = "Y1",
# mining_function = "regression")
#
# glm2 <- RODM_apply_model(database = DB, # Predict training data
# data_table_name = "dataset",
# model_name = "GLM_MODEL",
# supplemental_cols = "X1")
# windows(height=8, width=12)
# plot(x1, y1, pch=20, col="blue")
# points(x=glm2$model.apply.results[, "X1"],
# glm2$model.apply.results[, "PREDICTION"], pch=20, col="red")
# legend(0.5, 9, legend = c("actual", "GLM regression"), pch = c(20, 20),
# col = c("blue", "red"),
# pt.bg = c("blue", "red"), cex = 1.20, pt.cex=1.5, bty="n")
#
# RODM_drop_model(DB, "GLM_MODEL") # Drop the model
# RODM_drop_dbms_table(DB, "dataset") # Drop the database table
# RODM_close_dbms_connection(DB)
# ## End(Not run)
Run the code above in your browser using DataLab