LinearMixedModelDeployment: Deploy a production-ready predictive Linear Mixed Model model

Description

This step allows one to

Load a saved model from LinearMixedModelDevelopment
Run the model against test data to generate predictions
Push these predictions to SQL Server

The linear mixed model functionality works best with smaller data sets.

Usage

LinearMixedModelDeployment(type, df, grainCol, personCol, 
predictedCol, impute, debug, cores, modelName)

Arguments

type

The type of model (either 'regression' or 'classification')

Dataframe whose columns are used for new predictions. Data structure should match development as much as possible. Number of columns, names, types, grain, and predicted must be the same.

grainCol

Optional. The dataframe's column that has IDs pertaining to the grain. No ID columns are truly needed for this step.

personCol

The data frame's columns that represents the patient/person

predictedCol

Column that you want to predict.

impute

For training df, set all-column imputation to T or F. If T, this uses values calculated in development. F leads to removal of rows containing NULLs and is not recommended.

debug

Provides the user extended output to the console, in order to monitor the calculations throughout. Use T or F.

cores

Number of cores you'd like to use. Defaults to 2.

modelName

Optional string. Can specify the model name. If used, you must load the same one in the deploy step.

Format

An object of class R6ClassGenerator of length 24.

Methods

The above describes params for initializing a new linearMixedModelDeployment class with $new(). Individual methods are documented below.

<code>$new()</code>

Initializes a new linear mixed model deployment class using the parameters saved in p, documented above. This method loads, cleans, and prepares data for generating predictions. Usage: $new(p)

<code>$deploy()</code>

Generate new predictions, calculate top factors, and prepare the output dataframe. Usage:$deploy()

<code>$getTopFactors()</code>

Return the grain, all top factors, and their weights. Usage: $getTopFactors(numberOfFactors = NA, includeWeights = FALSE) Params: - numberOfFactors: returns the top n factors. Defaults to all factors. - includeWeights: If TRUE, returns weights associated with each factor.

<code>$getOutDf()</code>

Returns the output dataframe. Usage: $getOutDf()

Examples

Run this code

# NOT RUN {
#### Classification Example using csv data ####
## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)

# setwd('C:/Yourscriptlocation/Useforwardslashes') # Uncomment if using csv

# Can delete this line in your work
csvfile <- system.file("extdata", 
                       "HCRDiabetesClinical.csv", 
                       package = "healthcareai")

# Replace csvfile with 'path/file'
df <- read.csv(file = csvfile, 
               header = TRUE, 
               na.strings = c("NULL", "NA", ""))

# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]

## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)

set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$personCol <- "PatientID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1

# Run Linear Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()

## 3. Load saved model and use DEPLOY to generate predictions. 
print('Fake production data:')
str(dfDeploy)

p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "classification"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$personCol <- "PatientID"
p2$predictedCol <- "ThirtyDayReadmitFLG"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1

dL <- LinearMixedModelDeployment$new(p2)
dL$deploy()

dfOut <- dL$getOutDf()
head(dfOut)
# Write to CSV (or JSON, MySQL, etc) using plain R syntax
# write.csv(dfOut,'path/predictionsfile.csv')

print(proc.time() - ptm)

# }
# NOT RUN {
#### Classification example using SQL Server data ####
# This example requires you to first create a table in SQL Server
# If you prefer to not use SAMD, execute this in SSMS to create output table:
# CREATE TABLE dbo.HCRDeployClassificationBASE(
#   BindingID float, BindingNM varchar(255), LastLoadDTS datetime2,
#   PatientEncounterID int, <--change to match inputID
#   PredictedProbNBR decimal(38, 2),
#   Factor1TXT varchar(255), Factor2TXT varchar(255), Factor3TXT varchar(255)
# )

## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)

connection.string <- "
driver={SQL Server};
server=localhost;
database=SAM;
trusted_connection=true
"

query <- "
SELECT
[PatientEncounterID]
,[PatientID]
,[SystolicBPNBR]
,[LDLNBR]
,[A1CNBR]
,[GenderFLG]
,[ThirtyDayReadmitFLG]
FROM [SAM].[dbo].[HCRDiabetesClinical]
"

df <- selectData(connection.string, query)

# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]

## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)

set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$personCol <- "PatientID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1

# Run Linear Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()

## 3. Load saved model and use DEPLOY to generate predictions. 
print('Fake production data:')
str(dfDeploy)

p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "classification"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$personCol <- "PatientID"
p2$predictedCol <- "ThirtyDayReadmitFLG"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1

dL <- LinearMixedModelDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()

writeData(MSSQLConnectionString = connection.string,
          df = dfOut,
          tableName = 'HCRDeployClassificationBASE')

print(proc.time() - ptm)
# }
# NOT RUN {
# }
# NOT RUN {
#### Regression Example using SQL Server data ####
# This example requires you to first create a table in SQL Server
# If you prefer to not use SAMD, execute this in SSMS to create output table:
# CREATE TABLE dbo.HCRDeployRegressionBASE(
#   BindingID float, BindingNM varchar(255), LastLoadDTS datetime2,
#   PatientEncounterID int, <--change to match inputID
#   PredictedValueNBR decimal(38, 2),
#   Factor1TXT varchar(255), Factor2TXT varchar(255), Factor3TXT varchar(255)
# )

## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)

connection.string <- "
driver={SQL Server};
server=localhost;
database=SAM;
trusted_connection=true
"

query <- "
SELECT
[PatientEncounterID]
,[PatientID]
,[SystolicBPNBR]
,[LDLNBR]
,[A1CNBR]
,[GenderFLG]
,[ThirtyDayReadmitFLG]
FROM [SAM].[dbo].[HCRDiabetesClinical]
"

df <- selectData(connection.string, query)

# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]

## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)

set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "regression"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$personCol <- "PatientID"
p$predictedCol <- "A1CNBR"
p$debug <- FALSE
p$cores <- 1

# Run Linear Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()

## 3. Load saved model and use DEPLOY to generate predictions. 
dfDeploy$A1CNBR <- NULL # You won't know the response in production
print('Fake production data:')
str(dfDeploy)

p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "regression"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$personCol <- "PatientID"
p2$predictedCol <- "A1CNBR"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1

dL <- LinearMixedModelDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()

writeData(MSSQLConnectionString = connection.string,
          df = dfOut,
          tableName = 'HCRDeployRegressionBASE')

print(proc.time() - ptm)
# }
# NOT RUN {
# }
# NOT RUN {
#### Classification example pulling from CSV and writing to SQLite ####


## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)

# Can delete these system.file lines in your work
csvfile <- system.file("extdata", 
                       "HCRDiabetesClinical.csv", 
                       package = "healthcareai")
                       
sqliteFile <- system.file("extdata",
                          "unit-test.sqlite",
                          package = "healthcareai")

# Read in CSV; replace csvfile with 'path/file'
df <- read.csv(file = csvfile, 
               header = TRUE, 
               na.strings = c("NULL", "NA", ""))

# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]

## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)

set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "classification"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$personCol <- "PatientID"
p$predictedCol <- "ThirtyDayReadmitFLG"
p$debug <- FALSE
p$cores <- 1

# Run Linear Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()

## 3. Load saved model and use DEPLOY to generate predictions. 
print('Fake production data:')
str(dfDeploy)

p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "classification"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$personCol <- "PatientID"
p2$predictedCol <- "ThirtyDayReadmitFLG"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1

dL <- LinearMixedModelDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()

writeData(SQLiteFileName = sqliteFile,
          df = dfOut,
          tableName = 'HCRDeployClassificationBASE')

print(proc.time() - ptm)
# }
# NOT RUN {
# }
# NOT RUN {
#### Regression example pulling from CSV and writing to SQLite ####

## 1. Loading data and packages.
ptm <- proc.time()
library(healthcareai)

# Can delete these system.file lines in your work
csvfile <- system.file("extdata", 
                       "HCRDiabetesClinical.csv", 
                       package = "healthcareai")

sqliteFile <- system.file("extdata",
                          "unit-test.sqlite",
                          package = "healthcareai")

# Read in CSV; replace csvfile with 'path/file'
df <- read.csv(file = csvfile, 
               header = TRUE, 
               na.strings = c("NULL", "NA", ""))

# Save a dataframe for validation later on
dfDeploy <- df[951:1000,]

## 2. Train and save the model using DEVELOP
print('Historical, development data:')
str(df)

set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "regression"
p$impute <- TRUE
p$grainCol <- "PatientEncounterID"
p$personCol <- "PatientID"
p$predictedCol <- "A1CNBR"
p$debug <- FALSE
p$cores <- 1

# Run Linear Mixed Model
lmm <- LinearMixedModelDevelopment$new(p)
lmm$run()

## 3. Load saved model and use DEPLOY to generate predictions. 
dfDeploy$A1CNBR <- NULL # You won't know the response in production
print('Fake production data:')
str(dfDeploy)

p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "regression"
p2$df <- dfDeploy
p2$grainCol <- "PatientEncounterID"
p2$personCol <- "PatientID"
p2$predictedCol <- "A1CNBR"
p2$impute <- TRUE
p2$debug <- FALSE
p2$cores <- 1

dL <- LinearMixedModelDeployment$new(p2)
dL$deploy()
dfOut <- dL$getOutDf()

writeData(SQLiteFileName = sqliteFile,
          df = dfOut,
          tableName = 'HCRDeployRegressionBASE')

print(proc.time() - ptm)
# }

Run the code above in your browser using DataLab