data(
vi,
vi_predictors
)
#subset to limit example run time
vi <- vi[1:1000, ]
#computing preference order
#with response
#numeric and categorical predictors in the output
#as the R-squared between each predictor and the response
preference.order <- preference_order(
df = vi,
response = "vi_mean",
predictors = vi_predictors,
f = f_rsquared,
workers = 1
)
preference.order
#using it in variable selection with collinear()
selected.predictors <- cor_select(
df = vi,
response = "vi_mean", #don't forget the response!
predictors = vi_predictors,
preference_order = preference.order,
max_cor = 0.75
)
selected.predictors
#check their correlations
selected.predictors.cor <- cor_df(
df = vi,
response = "vi_mean",
predictors = selected.predictors
)
#all correlations below max_cor
selected.predictors.cor
#USING A CUSTOM FUNCTION
#custom function to compute RMSE between a predictor and a response
#x is a predictor name
#y is a response name
#df is a data frame with multiple predictors and one response
#must return a single number, with higher number indicating higher preference
#notice we use "one minus RMSE" to give higher rank to variables with lower RMSE
f_rmse <- function(x, y, df){
xy <- df[, c(x, y)] |>
na.omit() |>
scale()
1 - sqrt(mean((xy[, 1] - xy[, 2])^2))
}
preference.order <- preference_order(
df = vi,
response = "vi_mean",
predictors = vi_predictors,
f = f_rmse,
workers = 1
)
preference.order
Run the code above in your browser using DataLab