# NOT RUN {
#### Example using csv dataset ####
ptm <- proc.time()
library(healthcareai)
# 1. Load data. Categorical columns should be characters.
# can delete these system.file lines in your work
csvfile <- system.file("extdata",
"dermatology_multiclass_data.csv",
package = "healthcareai")
# Read in CSV; replace csvfile with 'path/file'
df <- read.csv(file = csvfile,
header = TRUE,
stringsAsFactors = FALSE,
na.strings = c("NULL", "NA", "", "?"))
str(df) # check the types of columns
dfDeploy <- df[347:366,] # reserve 20 rows for deploy step.
# 2. Develop and save model (saving is automatic)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "multiclass"
p$impute <- TRUE
p$grainCol <- "PatientID"
p$predictedCol <- "target"
p$debug <- FALSE
p$cores <- 1
# xgb_params must be a list with all of these things in it.
# if you would like to tweak parameters, go for it!
# Leave objective and eval_metric as they are.
p$xgb_params <- list("objective" = "multi:softprob",
"eval_metric" = "mlogloss",
"max_depth" = 6, # max depth of each learner
"eta" = 0.1, # learning rate
"silent" = 0, # verbose output when set to 1
"nthread" = 2) # number of processors to use
# Run model
boost <- XGBoostDevelopment$new(p)
boost$run()
## 3. Load saved model (automatic) and use DEPLOY to generate predictions.
p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "multiclass"
p2$df <- dfDeploy
p2$grainCol <- "PatientID"
p2$predictedCol <- "target"
p2$impute <- TRUE
p2$debug <- FALSE
# Deploy model to make new predictions
boostD <- XGBoostDeployment$new(p2)
boostD$deploy()
# Get output dataframe for csv or SQL
outDf <- boostD$getOutDf()
head(outDf)
# If you want to write to sqlite:
# sqliteFile <- system.file("extdata",
# "unit-test.sqlite",
# package = "healthcareai")
# writeData(SQLiteFileName = sqliteFile,
# df = outDf,
# tableName = "dermatologyDeployMulticlassBASE")
# Write to CSV (or JSON, MySQL, etc) using plain R syntax
# write.csv(df,'path/predictionsfile.csv')
# Get raw predictions if you want
# rawPredictions <- boostD$getPredictions()
# If you have known labels, check your prediction accuracy like this:
# caret::confusionMatrix(true_label,
# predicted_label,
# mode = "everything")
print(proc.time() - ptm)
# }
# NOT RUN {
#### Example pulling from CSV and writing to SQL server ####
# This example requires you to first create a table in SQL Server
# If you prefer to not use SAMD, execute this in SSMS to create output table:
# CREATE TABLE [dbo].[dermatologyDeployClassificationBASE](
# [BindingID] [int] NULL,[BindingNM] [varchar](255) NULL,
# [LastLoadDTS] [datetime2](7) NULL,
# [PatientID] [decimal](38, 0) NULL,
# [PredictedProb1] [decimal](38, 2) NULL,
# [PredictedClass1] [varchar](255) NULL,
# [PredictedProb2] [decimal](38, 2) NULL,
# [PredictedClass2] [varchar](255) NULL,
# [PredictedProb3] [decimal](38, 2) NULL,
# [PredictedClass3] [varchar](255) NULL)
# 1. Load data. Categorical columns should be characters.
csvfile <- system.file("extdata",
"dermatology_multiclass_data.csv",
package = "healthcareai")
# Replace csvfile with 'path/file'
df <- read.csv(file = csvfile,
header = TRUE,
stringsAsFactors = FALSE,
na.strings = c("NULL", "NA", "", "?"))
str(df) # check the types of columns
dfDeploy <- df[347:366,] # reserve 20 rows for deploy step.
# 2. Develop and save model (saving is automatic)
set.seed(42)
p <- SupervisedModelDevelopmentParams$new()
p$df <- df
p$type <- "multiclass"
p$impute <- TRUE
p$grainCol <- "PatientID"
p$predictedCol <- "target"
p$debug <- FALSE
p$cores <- 1
# xgb_params must be a list with all of these things in it.
# if you would like to tweak parameters, go for it!
# Leave objective and eval_metric as they are.
p$xgb_params <- list("objective" = "multi:softprob",
"eval_metric" = "mlogloss",
"max_depth" = 6, # max depth of each learner
"eta" = 0.1, # learning rate
"silent" = 0, # verbose output when set to 1
"nthread" = 2) # number of processors to use
# Run model
boost <- XGBoostDevelopment$new(p)
boost$run()
## 3. Load saved model (automatic) and use DEPLOY to generate predictions.
p2 <- SupervisedModelDeploymentParams$new()
p2$type <- "multiclass"
p2$df <- dfDeploy
p2$grainCol <- "PatientID"
p2$predictedCol <- "target"
p2$impute <- TRUE
p2$debug <- FALSE
# Deploy model to make new predictions
boostD <- XGBoostDeployment$new(p2)
boostD$deploy()
# Get output dataframe for csv or SQL
outDf <- boostD$getOutDf()
head(outDf)
# Save the output to SQL server
connection.string <- "
driver={SQL Server};
server=localhost;
database=SAM;
trusted_connection=true
"
writeData(MSSQLConnectionString = connection.string,
df = outDf,
tableName = 'dermatologyDeployClassificationBASE')
# Get raw predictions if you want
# rawPredictions <- boostD$getPredictions()
print(proc.time() - ptm)
# }
Run the code above in your browser using DataLab