# NOT RUN {
#### Example using iris dataset ####
ptm <- proc.time()
library(healthcareai)
data(iris)
head(iris)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- iris
p$type <- "regression"
p$impute <- TRUE
p$grainCol <- ""
p$predictedCol <- "Sepal.Width"
p$debug <- FALSE
p$cores <- 1
# Run Lasso
lasso <- LassoDevelopment$new(p)
lasso$run()
set.seed(42)
# Run RandomForest
rf <- RandomForestDevelopment$new(p)
rf$run()
print(proc.time() - ptm)
#### Example using csv data ####
library(healthcareai)
# setwd('C:/Your/script/location') # Needed if using YOUR CSV file
ptm <- proc.time()
# Can delete this line in your work
csvfile <- system.file("extdata",
"HCRDiabetesClinical.csv",
package = "healthcareai")
# Replace csvfile with 'your/path'
df <- read.csv(file = csvfile,
header = TRUE,
na.strings = c("NULL", "NA", ""))
head(df)
df$PatientID <- NULL
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "regression"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$predictedCol <- "A1CNBR"
p$debug <- FALSE
p$cores <- 1
# Run Lasso
lasso <- LassoDevelopment$new(p)
lasso$run()
set.seed(42)
# Run Random Forest
rf <- RandomForestDevelopment$new(p)
rf$run()
print(proc.time() - ptm)
# }
# NOT RUN {
#### Example using SQL Server data ####
ptm <- proc.time()
library(healthcareai)
connection.string <- "
driver={SQL Server};
server=localhost;
database=SAM;
trusted_connection=true
"
# This query should pull only rows for training. They must have a label.
query <- "
SELECT
[PatientEncounterID]
,[SystolicBPNBR]
,[LDLNBR]
,[A1CNBR]
,[GenderFLG]
,[ThirtyDayReadmitFLG]
FROM [SAM].[dbo].[HCRDiabetesClinical]
"
df <- selectData(connection.string, query)
head(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1
# Run Lasso
lasso <- LassoDevelopment$new(p)
lasso$run()
set.seed(42)
# Run Random Forest
rf <- RandomForestDevelopment$new(p)
rf$run()
# Plot ROC
rocs <- list(rf$getROC(), lasso$getROC())
names <- c("Random Forest", "Lasso")
legendLoc <- "bottomright"
plotROCs(rocs, names, legendLoc)
# Plot PR Curve
rocs <- list(rf$getPRCurve(), lasso$getPRCurve())
names <- c("Random Forest", "Lasso")
legendLoc <- "bottomleft"
plotPRCurve(rocs, names, legendLoc)
print(proc.time() - ptm)
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab