# NOT RUN {
# These examples take about 30 seconds to execute so aren't run automatically,
# but you should be able to execute this code locally.
# }
# NOT RUN {
# Split the data into training and test sets
d <- split_train_test(d = pima_diabetes,
outcome = diabetes,
percent_train = .9)
### Classification ###
# Clean and prep the training data, specifying that patient_id is an ID column,
# and tune algorithms over hyperparameter values to predict diabetes
diabetes_models <- machine_learn(d$train, patient_id, outcome = diabetes)
# Inspect model specification and performance
diabetes_models
# Make predictions (predicted probability of diabetes) on test data
predict(diabetes_models, d$test)
### Regression ###
# If the outcome variable is numeric, regression models will be trained
age_model <- machine_learn(d$train, patient_id, outcome = age)
# Get detailed information about performance over tuning values
summary(age_model)
# Get available performance metrics
evaluate(age_model)
# Plot training performance on tuning metric (default = RMSE)
plot(age_model)
# If new data isn't specifed, get predictions on training data
predict(age_model)
### Faster model training without tuning hyperparameters ###
# Train models at set hyperparameter values by setting tune to FALSE. This is
# faster (especially on larger datasets), but produces models with less
# predictive power.
machine_learn(d$train, patient_id, outcome = diabetes, tune = FALSE)
### Train models optimizing given metric ###
machine_learn(d$train, patient_id, outcome = diabetes, metric = "PR")
# }
Run the code above in your browser using DataLab