library(dplyr)
library(ggplot2)
library(caret)
library(rsample)
library(varhandle)
data("long_stayers")
glimpse(long_stayers)
# Examine class imbalance
prop.table(table(long_stayers$stranded.label))
# Feature engineering
long_stayers <- long_stayers %>%
dplyr::mutate(stranded.label=factor(stranded.label)) %>%
dplyr::select(everything(), -c(admit_date))
# Feature encoding
cats <- select_if(long_stayers, is.character)
cat_dummy <- varhandle::to.dummy(cats$frailty_index, "frail_ind")
#Converts the frailty index column to dummy encoding and sets a column called "frail_ind" prefix
cat_dummy <- cat_dummy %>%
as.data.frame() %>%
dplyr::select(-frail_ind.No_index_item) #Drop the field of interest
long_stayers <- long_stayers %>%
dplyr::select(-frailty_index) %>%
bind_cols(cat_dummy) %>% na.omit(.)
# Split the data
split <- rsample::initial_split(long_stayers, prop = 3/4)
train <- rsample::training(split)
test <- rsample::testing(split)
set.seed(123)
glm_class_mod <- caret::train(factor(stranded.label) ~ ., data = train,
method = "glm")
print(glm_class_mod)
# Predict the probabilities
preds <- predict(glm_class_mod, newdata = test) # Predict class
pred_prob <- predict(glm_class_mod, newdata = test, type="prob") #Predict probs
predicted <- data.frame(preds, pred_prob)
test <- test %>%
bind_cols(predicted) %>%
dplyr::rename(pred_class=preds)
#Evaluate with ConfusionTableR
library(ConfusionTableR)
cm <- ConfusionTableR::binary_class_cm(test$stranded.label, test$pred_class, positive="Stranded")
cm$record_level_cm
# Visualise odds ration
library(OddsPlotty)
plotty <- OddsPlotty::odds_plot(glm_class_mod$finalModel,
title = "Odds Plot ",
subtitle = "Showing odds of patient stranded",
point_col = "#00f2ff",
error_bar_colour = "black",
point_size = .5,
error_bar_width = .8,
h_line_color = "red")
print(plotty)
Run the code above in your browser using DataLab