# NOT RUN {
#### Classification Example using csv data ####
## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)
# setwd('C:/Yourscriptlocation/Useforwardslashes') # Uncomment if using csv
# Can delete this line in your work
csvfile <- system.file("extdata",
"HCRDiabetesClinical.csv",
package = "healthcareai")
# Replace csvfile with 'path/file'
df <- read.csv(file = csvfile,
header = TRUE,
na.strings = c("NULL", "NA", ""))
df$PatientID <- NULL # Only one ID column (ie, PatientEncounterID) is needed remove this column
# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]
## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1
# Run Lasso
Lasso<- LassoDevelopment$new(p)
Lasso$run()
## 3. Load saved model and use DEPLOY to generate predictions.
print('Fake production data:')
str(dfDeploy)
p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "classification"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$predictedCol <- "ThirtyDayReadmitFLG"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1
dL <- LassoDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()
head(dfOut)
# Write to CSV (or JSON, MySQL, etc) using plain R syntax
# write.csv(dfOut,'path/predictionsfile.csv')
print(proc.time() - ptm)
# }
# NOT RUN {
#### Classification example using SQL Server data ####
# This example requires you to first create a table in SQL Server
# If you prefer to not use SAMD, execute this in SSMS to create output table:
# CREATE TABLE dbo.HCRDeployClassificationBASE(
# BindingID float, BindingNM varchar(255), LastLoadDTS datetime2,
# PatientEncounterID int, <--change to match inputID
# PredictedProbNBR decimal(38, 2),
# Factor1TXT varchar(255), Factor2TXT varchar(255), Factor3TXT varchar(255)
# )
## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)
connection.string <- "
driver={SQL Server};
server=localhost;
database=SAM;
trusted_connection=true
"
query <- "
SELECT
[PatientEncounterID] --Only need one ID column for lasso
,[SystolicBPNBR]
,[LDLNBR]
,[A1CNBR]
,[GenderFLG]
,[ThirtyDayReadmitFLG]
FROM [SAM].[dbo].[HCRDiabetesClinical]
"
df <- selectData(connection.string, query)
# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]
## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1
# Run Lasso
Lasso<- LassoDevelopment$new(p)
Lasso$run()
## 3. Load saved model and use DEPLOY to generate predictions.
print('Fake production data:')
str(dfDeploy)
p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "classification"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$predictedCol <- "ThirtyDayReadmitFLG"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1
dL <- LassoDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()
writeData(MSSQLConnectionString = connection.string,
df = dfOut,
tableName = 'HCRDeployClassificationBASE')
print(proc.time() - ptm)
# }
# NOT RUN {
# }
# NOT RUN {
#### Regression Example using SQL Server data ####
# This example requires you to first create a table in SQL Server
# If you prefer to not use SAMD, execute this in SSMS to create output table:
# CREATE TABLE dbo.HCRDeployRegressionBASE(
# BindingID float, BindingNM varchar(255), LastLoadDTS datetime2,
# PatientEncounterID int, <--change to match inputID
# PredictedValueNBR decimal(38, 2),
# Factor1TXT varchar(255), Factor2TXT varchar(255), Factor3TXT varchar(255)
# )
## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)
connection.string <- "
driver={SQL Server};
server=localhost;
database=SAM;
trusted_connection=true
"
query <- "
SELECT
[PatientEncounterID] --Only need one ID column for lasso
,[SystolicBPNBR]
,[LDLNBR]
,[A1CNBR]
,[GenderFLG]
,[ThirtyDayReadmitFLG]
FROM [SAM].[dbo].[HCRDiabetesClinical]
"
df <- selectData(connection.string, query)
# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]
## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "regression"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$predictedCol <- "A1CNBR"
p$debug <- FALSE
p$cores <- 1
# Run lasso
Lasso<- LassoDevelopment$new(p)
Lasso$run()
## 3. Load saved model and use DEPLOY to generate predictions.
print('Fake production data:')
str(dfDeploy)
p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "regression"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$predictedCol <- "A1CNBR"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1
dL <- LassoDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()
writeData(MSSQLConnectionString = connection.string,
df = dfOut,
tableName = 'HCRDeployRegressionBASE')
print(proc.time() - ptm)
# }
# NOT RUN {
#### Classification example pulling from CSV and writing to SQLite ####
## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)
# Can delete these system.file lines in your work
csvfile <- system.file("extdata",
"HCRDiabetesClinical.csv",
package = "healthcareai")
sqliteFile <- system.file("extdata",
"unit-test.sqlite",
package = "healthcareai")
# Read in CSV; replace csvfile with 'path/file'
df <- read.csv(file = csvfile,
header = TRUE,
na.strings = c("NULL", "NA", ""))
df$PatientID <- NULL # Only one ID column (ie, PatientEncounterID) is needed
# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]
## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1
# Run lasso
Lasso <- LassoDevelopment$new(p)
Lasso$run()
## 3. Load saved model and use DEPLOY to generate predictions.
print('Fake production data:')
str(dfDeploy)
p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "classification"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$predictedCol <- "ThirtyDayReadmitFLG"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1
dL <- LassoDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()
writeData(SQLiteFileName = sqliteFile,
df = dfOut,
tableName = 'HCRDeployClassificationBASE')
print(proc.time() - ptm)
#### Regression example pulling from CSV and writing to SQLite ####
## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)
# Can delete these system.file lines in your work
csvfile <- system.file("extdata",
"HCRDiabetesClinical.csv",
package = "healthcareai")
sqliteFile <- system.file("extdata",
"unit-test.sqlite",
package = "healthcareai")
# Read in CSV; replace csvfile with 'path/file'
df <- read.csv(file = csvfile,
header = TRUE,
na.strings = c("NULL", "NA", ""))
df$PatientID <- NULL # Only one ID column (ie, PatientEncounterID) is needed remove this column
# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]
## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "regression"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$predictedCol <- "A1CNBR"
p$debug <- FALSE
p$cores <- 1
# Run lasso
Lasso<- LassoDevelopment$new(p)
Lasso$run()
## 3. Load saved model and use DEPLOY to generate predictions.
print('Fake production data:')
str(dfDeploy)
p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "regression"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$predictedCol <- "A1CNBR"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1
dL <- LassoDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()
writeData(SQLiteFileName = sqliteFile,
df = dfOut,
tableName = 'HCRDeployRegressionBASE')
print(proc.time() - ptm)
#### Identify factors that could benefit outcomes: getProcessVariablesDf ####
#############################################################################
# getProcessVariableDf() identifies opportunities for improved outcomes at
# the grain level. It is important that the variables ("modifiableVariables")
# and values ("variableLevels") used in this function are under the control
# of the care management process. The best use case for this function is a
# "natural experiment" where otherwise similar groups had different
# treatments applied to them, and that treatment is the modifiable variable
# of interest.
# This example shows how to use the getProcessVariableDf() function, using
# another readmission-prediction model. In this example systolic blood pressure
# is converted into a categorical variable to demonstrate functionality.
# Because of the lasso's automatic feature selection, this example is fairly
# limited. For a wider variety of examples, see ?RandomForestDeployment
csvfile <- system.file("extdata",
"HCRDiabetesClinical.csv",
package = "healthcareai")
# Replace csvfile with 'path/file'
df <- read.csv(file = csvfile,
header = TRUE,
na.strings = c("NULL", "NA", ""))
df$PatientID <- NULL # Remove extra ID
# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]
## Develop and Deploy the model
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1
Lasso <- LassoDevelopment$new(p)
Lasso$run()
p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "classification"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$predictedCol <- "ThirtyDayReadmitFLG"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1
dL <- LassoDeployment$new(p2)
dL$deploy()
## Get predicted outcome changes using getProcessVariablesDf
# getProcessVariablesDf only uses variables with non-zero coefficients,
# automatically discarding the rest. In this example, only A1CNBR had a
# nonzerocoefficient. Even though this variable is numeric, we can still use
# getProcessVariablesDf using the variableLevels parameter. For examples with
# categorical variables and additional details about getProcessVariablesDf,
# see the examples in ?RandomForestDeployment
dL$getProcessVariablesDf(modifiableVariables = c("A1CNBR"),
variableLevels = list(A1CNBR = c(5.6, 6.0, 6.5)))
# By default, the function returns predictions for all rows, but we can
# restrict to specific rows using the grainColumnIDs parameter
dL$getProcessVariablesDf(modifiableVariables = c("A1CNBR"),
variableLevels = list(A1CNBR = c(5.6, 6.0, 6.5)),
grainColumnIDs = c(951, 975))
# The repeatedFactors parameter allows one to get multiple predictions
# forthe same variable. For example, reducing A1C to 5.6 might most improve a
# patient's risk, but reducing A1C to 5.9 is likely to also reduce the risk.
dL$getProcessVariablesDf(modifiableVariables = c("A1CNBR"),
variableLevels = list(A1CNBR = c(5.6, 5.9, 6.2, 6.5)),
repeatedFactors = TRUE)
# The numTopFactors parameter allows one to set the maximum number of
# predictions to display (with the default being 3)
dL$getProcessVariablesDf(modifiableVariables = c("A1CNBR"),
variableLevels = list(A1CNBR = c(5.6, 5.9, 6.2, 6.5)),
repeatedFactors = TRUE,
numTopFactors = 2)
# If we want to make predictions for increasing the probability (not
# likely in the case of readmissions), we can do so using the smallerBetter
# parameter. (Here, all the deltas will be non-negative, corresponding to an
# increased risk)
dL$getProcessVariablesDf(modifiableVariables = c("A1CNBR"),
variableLevels = list(A1CNBR = c(5.6, 6.0, 6.5)),
smallerBetter = FALSE)
# }
Run the code above in your browser using DataLab