# NOT RUN {
### Built-in example; Doing classification
library(healthcareai)
library(lme4)
df <- sleepstudy
str(df)
# Create binary column for classification
df$ReactionFLG <- ifelse(df$Reaction > 300, "Y", "N")
df$Reaction <- NULL
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$personCol <- "Subject" # Think of this as PatientID
p$predictedCol <- "ReactionFLG"
p$debug <- FALSE
p$cores <- 1
# Create Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()
### Doing regression
library(healthcareai)
# SQL query and connection goes here - see SelectData function.
df <- sleepstudy
# Add GrainID, which is equivalent to PatientEncounterID
df$GrainID <- seq.int(nrow(df))
str(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "regression"
p$impute <- TRUE
p$grainCol <- "GrainID" # Think of this as PatientEnounterID
p$personCol <- "Subject" # Think of this as PatientID
p$predictedCol <- "Reaction"
p$debug <- FALSE
p$cores <- 1
# Create Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()
#### Example using csv data ####
library(healthcareai)
# setwd('C:/Your/script/location') # Needed if using YOUR CSV file
ptm <- proc.time()
# Can delete this line in your work
csvfile <- system.file("extdata", "HCRDiabetesClinical.csv", package = "healthcareai")
#Replace csvfile with "path/to/yourfile"
df <- read.csv(file = csvfile, header = TRUE, na.strings = c("NULL", "NA", ""))
head(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$personCol <- "PatientID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1
# Create Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()
set.seed(42)
# Run Lasso
# Lasso <- LassoDevelopment$new(p)
# Lasso$run()
cat(proc.time() - ptm, '\n')
# }
# NOT RUN {
#### This example is specific to Windows and is not tested.
#### Example using SQL Server data ####
# This example requires that you alter your connection string / query
# to read in your own data
ptm <- proc.time()
library(healthcareai)
connection.string <- "
driver={SQL Server};
server=localhost;
database=SAM;
trusted_connection=true
"
# This query should pull only rows for training. They must have a label.
query <- "
SELECT
[PatientEncounterID]
,[PatientID]
,[SystolicBPNBR]
,[LDLNBR]
,[A1CNBR]
,[GenderFLG]
,[ThirtyDayReadmitFLG]
FROM [SAM].[dbo].[HCRDiabetesClinical]
--no WHERE clause, because we want train AND test
"
df <- selectData(connection.string, query)
head(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$personCol <- "PatientID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1
# Create Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()
# Remove person col, since RF can't use it
df$personCol <- NULL
p$df <- df
p$personCol <- NULL
set.seed(42)
# Run Random Forest
rf <- RandomForestDevelopment$new(p)
rf$run()
# Plot ROC
rocs <- list(lmm$getROC(), rf$getROC())
names <- c("Linear Mixed Model", "Random Forest")
legendLoc <- "bottomright"
plotROCs(rocs, names, legendLoc)
# Plot PR Curve
rocs <- list(lmm$getPRCurve(), rf$getPRCurve())
names <- c("Linear Mixed Model", "Random Forest")
legendLoc <- "bottomleft"
plotPRCurve(rocs, names, legendLoc)
cat(proc.time() - ptm, '\n')
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab