Last chance! 50% off unlimited learning
Sale ends in
AutoLimeAid automated lime explanations and lime model builds.
AutoLimeAid(
EvalPredsData = data,
LimeTrainingData = data,
LimeBins = 10,
LimeIterations = 7500,
LimeNumFeatures = 0,
LimeModel = NULL,
LimeModelPath = NULL,
LimeModelID = NULL,
MLModel = NULL,
MLModelPath = NULL,
MLMetaDataPath = NULL,
MLModelID = NULL,
ModelType = "xgboost",
TargetType = "classification",
NThreads = parallel::detectCores(),
MaxMem = "32G",
FeatureColumnNames = TestModel$ColNames,
IDcols = NULL,
FactorLevelsList = TestModel$FactorLevels,
TargetLevels = NULL,
OneHot = FALSE,
ReturnFeatures = TRUE,
TransformNumeric = FALSE,
BackTransNumeric = FALSE,
TargetColumnName = NULL,
TransformationObject = NULL,
TransID = NULL,
TransPath = NULL,
MDP_Impute = TRUE,
MDP_CharToFactor = TRUE,
MDP_RemoveDates = TRUE,
MDP_MissFactor = "0",
MDP_MissNum = -1
)
Data used for interpretation. Should be the same kind of data used on ML_Scoring functions.
Data used to train your ML model
Number of bins to use for bucketing numeric variables
Number of lime permutations ran to generate interpretation of predicted value
How many features do you want to be considering for the Lime evaluation? Set to 0 to use all features
Supply a model if you have one available. Otherwise, provide a model path and either it will be pulling in or made and saved there.
Supply a path to where your model is located or to be stored.
Provide a name for your model. If left NULL, a name will be created for you (and a new model).
Supply the model object (except for H2O models). Can leave null.
Supply a path to where your model is located. If this is supplied, the model will be pulled in from file (even if you supply a model)
Supply a path to where your model metadata is located (might be the same of the MLModelPath). If this is supplied, artifacts about the model will be pulled in from there.
The name of your model as read in the file directory
Choose from "xgboost", "h2o", "catboost"
For catboost models only. Select from "classification", "regression", "multiclass"
Number of CPU threads.
Set the max memory you want to allocate. E.g. "32G"
The names of the features used in training your ML model (should be returned with the model or saved to file)
The ID columns used in either CatBoost or XGBoost
= TestModel$FactorLevels,
The target levels used in MultiClass models
Replicate what you did with the model training
TRUE or FALSE
Replicate what you did with the model training
TRUE or FALSE. Replicate what you did with the model training.
For the transformations
TRUE or FALSE. Replicate what you did with the model training.
Set to the ID used in model training.
Same path used in model training.
Replicate what you did with the model training.
Replicate what you did with the model training.
Replicate what you did with the model training.
Replicate what you did with the model training.
Replicate what you did with the model training.
LimeModelObject and Lime Explanations
Other Model Evaluation and Interpretation:
EvalPlot()
,
LimeModel()
,
ParDepCalPlots()
,
RedYellowGreen()
,
threshOptim()
# NOT RUN {
# CatBoost data generator
dataGenH2O <- function() {
Correl <- 0.85
N <- 10000
data <- data.table::data.table(Classification = runif(N))
data[, x1 := qnorm(Classification)]
data[, x2 := runif(N)]
data[, Independent_Variable1 := log(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable2 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable3 := exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2))))]
data[, Independent_Variable5 := sqrt(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable6 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.10]
data[, Independent_Variable7 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.25]
data[, Independent_Variable8 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.75]
data[, Independent_Variable9 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^2]
data[, Independent_Variable10 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^4]
data[, Independent_Variable11 := as.factor(
ifelse(Independent_Variable2 < 0.20,
"A",ifelse(Independent_Variable2 < 0.40,
"B",ifelse(Independent_Variable2 < 0.6,
"C",ifelse(Independent_Variable2 < 0.8, "D", "E")))))]
data[, ':=' (x1 = NULL, x2 = NULL)]
data[, Classification := ifelse(Classification > 0.5, 1, 0)]
rm(N,Correl)
return(data)
}
data <- dataGenH2O()
TestModel <- RemixAutoML::AutoCatBoostRegression(
data,
TrainOnFull = FALSE,
ValidationData = NULL,
TestData = NULL,
TargetColumnName = "Classification",
FeatureColNames = c(2:12),
PrimaryDateColumn = NULL,
IDcols = NULL,
MaxModelsInGrid = 3,
task_type = "GPU",
eval_metric = "RMSE",
Trees = 50,
GridTune = FALSE,
model_path = "C:/Users/aantico/Documents/Package/GUI_Package",
metadata_path = NULL,
ModelID = "Adrian",
NumOfParDepPlots = 15,
ReturnModelObjects = TRUE,
SaveModelObjects = TRUE,
PassInGrid = NULL)
# CatBoost Build Lime Model and Explanations
LimeOutput <- RemixAutoML::AutoLimeAid(
EvalPredsData = data[c(1,15)],
LimeTrainingData = data,
LimeBins = 10,
LimeIterations = 7500,
LimeNumFeatures = 0,
TargetType = "regression",
LimeModel = NULL,
LimeModelPath = "C:/Users/aantico/Documents/Package/GUI_Package",
LimeModelID = "AdrianLime",
MLModel = NULL,
MLModelPath = "C:/Users/aantico/Documents/Package/GUI_Package",
MLMetaDataPath = NULL,
MLModelID = "Adrian",
ModelType = "catboost",
NThreads = parallel::detectCores(),
MaxMem = "14G",
FeatureColumnNames = NULL,
IDcols = NULL,
FactorLevelsList = NULL,
TargetLevels = NULL,
OneHot = FALSE,
ReturnFeatures = TRUE,
TransformNumeric = FALSE,
BackTransNumeric = FALSE,
TargetColumnName = NULL,
TransformationObject = NULL,
TransID = NULL,
TransPath = NULL,
MDP_Impute = TRUE,
MDP_CharToFactor = TRUE,
MDP_RemoveDates = TRUE,
MDP_MissFactor = "0",
MDP_MissNum = -1)
# Plot lime objects
lime::plot_features(LimeOutput$LimeExplanations)
suppressWarnings(lime::plot_explanations(LimeOutput$LimeExplanations))
# H2O data generator
dataGenH2O <- function() {
Correl <- 0.85
N <- 10000
data <- data.table::data.table(Classification = runif(N))
data[, x1 := qnorm(Classification)]
data[, x2 := runif(N)]
data[, Independent_Variable1 := log(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable2 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable3 := exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2))))]
data[, Independent_Variable5 := sqrt(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable6 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.10]
data[, Independent_Variable7 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.25]
data[, Independent_Variable8 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.75]
data[, Independent_Variable9 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^2]
data[, Independent_Variable10 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^4]
data[, Independent_Variable11 := as.factor(ifelse(Independent_Variable2 < 0.20,
"A",ifelse(Independent_Variable2 < 0.40,
"B",ifelse(Independent_Variable2 < 0.6,
"C",ifelse(Independent_Variable2 < 0.8, "D", "E")))))]
data[, ':=' (x1 = NULL, x2 = NULL)]
data[, Classification := ifelse(Classification > 0.5, 1, 0)]
rm(N,Correl)
return(data)
}
data <- dataGenH2O()
TestModel <- RemixAutoML::AutoH2oDRFClassifier(
data = data,
TrainOnFull = FALSE,
ValidationData = NULL,
TestData = NULL,
TargetColumnName = "Classification",
FeatureColNames = setdiff(names(data),"Classification"),
eval_metric = "auc",
Trees = 50,
GridTune = FALSE,
MaxMem = "32G",
NThreads = max(1, parallel::detectCores()-2),
MaxModelsInGrid = 10,
model_path = "C:/Users/aantico/Desktop/Retention Analytics",
metadata_path = NULL,
ModelID = "Adrian",
NumOfParDepPlots = 10,
ReturnModelObjects = TRUE,
SaveModelObjects = TRUE,
IfSaveModel = "standard",
H2OShutdown = TRUE)
LimeOutput <- RemixAutoML::AutoLimeAid(
EvalPredsData = data[c(1,15)],
LimeTrainingData = data,
LimeBins = 10,
LimeIterations = 7500,
TargetType = "regression",
LimeNumFeatures = 0,
LimeModel = NULL,
LimeModelPath = "C:/Users/aantico/Desktop/Retention Analytics",
LimeModelID = "AdrianLime",
MLModel = NULL,
MLModelPath = "C:/Users/aantico/Desktop/Retention Analytics",
MLMetaDataPath = NULL,
MLModelID = "Adrian",
ModelType = "h2o",
NThreads = parallel::detectCores(),
MaxMem = "14G",
FeatureColumnNames = NULL,
IDcols = NULL,
FactorLevelsList = NULL,
TargetLevels = NULL,
OneHot = FALSE,
ReturnFeatures = TRUE,
TransformNumeric = FALSE,
BackTransNumeric = FALSE,
TargetColumnName = NULL,
TransformationObject = NULL,
TransID = NULL,
TransPath = NULL,
MDP_Impute = TRUE,
MDP_CharToFactor = TRUE,
MDP_RemoveDates = TRUE,
MDP_MissFactor = "0",
MDP_MissNum = -1)
# Plot lime objects
lime::plot_features(LimeOutput$LimeExplanations)
suppressWarnings(lime::plot_explanations(LimeOutput$LimeExplanations))
# XGBoost create data function
dataGenXGBoost <- function() {
Correl <- 0.85
N <- 10000
data <- data.table::data.table(Classification = runif(N))
data[, x1 := qnorm(Classification)]
data[, x2 := runif(N)]
data[, Independent_Variable1 := log(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable2 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable3 := exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable4 := exp(exp(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2))))]
data[, Independent_Variable5 := sqrt(pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))]
data[, Independent_Variable6 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.10]
data[, Independent_Variable7 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.25]
data[, Independent_Variable8 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^0.75]
data[, Independent_Variable9 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^2]
data[, Independent_Variable10 := (pnorm(Correl * x1 + sqrt(1-Correl^2) * qnorm(x2)))^4]
data[, Independent_Variable11 := as.factor(ifelse(Independent_Variable2 < 0.20,
"A",ifelse(Independent_Variable2 < 0.40,
"B",ifelse(Independent_Variable2 < 0.6,
"C",ifelse(Independent_Variable2 < 0.8, "D", "E")))))]
data[, ':=' (x1 = NULL, x2 = NULL)]
data[, Classification := ifelse(Classification > 0.5, 1, 0)]
rm(Correl,N)
return(data)
}
data <- dataGenXGBoost()
TestModel <- RemixAutoML::AutoXGBoostClassifier(
data,
TrainOnFull = FALSE,
ValidationData = NULL,
TestData = NULL,
TargetColumnName = "Classification",
FeatureColNames = 2:12,
IDcols = NULL,
eval_metric = "auc",
Trees = 50,
GridTune = FALSE,
grid_eval_metric = "auc",
MaxModelsInGrid = 10,
NThreads = 8,
TreeMethod = "hist",
model_path = "C:/Users/aantico/Desktop/Retention Analytics",
metadata_path = NULL,
ModelID = "Adrian2",
NumOfParDepPlots = 3,
ReturnModelObjects = TRUE,
ReturnFactorLevels = TRUE,
SaveModelObjects = TRUE,
PassInGrid = NULL)
# XGBoost Build Lime and Generate Output
LimeOutput <- RemixAutoML::AutoLimeAid(
EvalPredsData = data[c(1,15)],
LimeTrainingData = data,
LimeBins = 10,
TargetType = "classification",
LimeIterations = 7500,
LimeNumFeatures = 0,
LimeModel = NULL,
LimeModelPath = "C:/Users/aantico/Desktop/Retention Analytics",
LimeModelID = "Adrian2Lime",
MLModel = NULL,
MLModelPath = "C:/Users/aantico/Desktop/Retention Analytics",
MLMetaDataPath = NULL,
MLModelID = "Adrian2",
ModelType = "xgboost",
NThreads = parallel::detectCores(),
MaxMem = "14G",
FeatureColumnNames = NULL,
IDcols = NULL,
FactorLevelsList = NULL,
TargetLevels = NULL,
OneHot = FALSE,
ReturnFeatures = TRUE,
TransformNumeric = FALSE,
BackTransNumeric = FALSE,
TargetColumnName = NULL,
TransformationObject = NULL,
TransID = NULL,
TransPath = NULL,
MDP_Impute = TRUE,
MDP_CharToFactor = TRUE,
MDP_RemoveDates = TRUE,
MDP_MissFactor = "0",
MDP_MissNum = -1)
# Plot lime objects
lime::plot_features(LimeOutput$LimeExplanations)
suppressWarnings(lime::plot_explanations(LimeOutput$LimeExplanations))
# }
Run the code above in your browser using DataLab