# \donttest{
if (torch::torch_is_installed() & rlang::is_installed(c("recipes", "yardstick", "modeldata"))) {
## -----------------------------------------------------------------------------
# regression examples (increase # epochs to get better results)
data(ames, package = "modeldata")
ames$Sale_Price <- log10(ames$Sale_Price)
set.seed(122)
in_train <- sample(1:nrow(ames), 2000)
ames_train <- ames[ in_train,]
ames_test <- ames[-in_train,]
# Using matrices
set.seed(1)
fit <-
brulee_mlp(x = as.matrix(ames_train[, c("Longitude", "Latitude")]),
y = ames_train$Sale_Price, penalty = 0.10)
# Using recipe
library(recipes)
ames_rec <-
recipe(Sale_Price ~ Bldg_Type + Neighborhood + Year_Built + Gr_Liv_Area +
Full_Bath + Year_Sold + Lot_Area + Central_Air + Longitude + Latitude,
data = ames_train) %>%
# Transform some highly skewed predictors
step_BoxCox(Lot_Area, Gr_Liv_Area) %>%
# Lump some rarely occurring categories into "other"
step_other(Neighborhood, threshold = 0.05) %>%
# Encode categorical predictors as binary.
step_dummy(all_nominal_predictors(), one_hot = TRUE) %>%
# Add an interaction effect:
step_interact(~ starts_with("Central_Air"):Year_Built) %>%
step_zv(all_predictors()) %>%
step_normalize(all_numeric_predictors())
set.seed(2)
fit <- brulee_mlp(ames_rec, data = ames_train, hidden_units = 20,
dropout = 0.05, rate_schedule = "cyclic", step_size = 4)
fit
autoplot(fit)
library(ggplot2)
predict(fit, ames_test) %>%
bind_cols(ames_test) %>%
ggplot(aes(x = .pred, y = Sale_Price)) +
geom_abline(col = "green") +
geom_point(alpha = .3) +
lims(x = c(4, 6), y = c(4, 6)) +
coord_fixed(ratio = 1)
library(yardstick)
predict(fit, ames_test) %>%
bind_cols(ames_test) %>%
rmse(Sale_Price, .pred)
# Using multiple hidden layers and activation functions
set.seed(2)
hidden_fit <- brulee_mlp(ames_rec, data = ames_train,
hidden_units = c(15L, 17L), activation = c("relu", "elu"),
dropout = 0.05, rate_schedule = "cyclic", step_size = 4)
predict(hidden_fit, ames_test) %>%
bind_cols(ames_test) %>%
rmse(Sale_Price, .pred)
# ------------------------------------------------------------------------------
# classification
library(dplyr)
library(ggplot2)
data("parabolic", package = "modeldata")
set.seed(1)
in_train <- sample(1:nrow(parabolic), 300)
parabolic_tr <- parabolic[ in_train,]
parabolic_te <- parabolic[-in_train,]
set.seed(2)
cls_fit <- brulee_mlp(class ~ ., data = parabolic_tr, hidden_units = 2,
epochs = 200L, learn_rate = 0.1, activation = "elu",
penalty = 0.1, batch_size = 2^8, optimizer = "SGD")
autoplot(cls_fit)
grid_points <- seq(-4, 4, length.out = 100)
grid <- expand.grid(X1 = grid_points, X2 = grid_points)
predict(cls_fit, grid, type = "prob") %>%
bind_cols(grid) %>%
ggplot(aes(X1, X2)) +
geom_contour(aes(z = .pred_Class1), breaks = 1/2, col = "black") +
geom_point(data = parabolic_te, aes(col = class))
}
# }
Run the code above in your browser using DataLab