data(
vi,
vi_predictors
)
#subset to limit example run time
vi <- vi[1:1000, ]
vi_predictors <- vi_predictors[1:10]
#reduce correlation in predictors with cor_select()
vi_predictors <- cor_select(
df = vi,
response = "vi_mean",
predictors = vi_predictors,
max_cor = 0.75
)
#without response
#without preference_order
#permissive max_vif
#only numeric predictors are processed
selected.predictors <- vif_select(
df = vi,
predictors = vi_predictors,
max_vif = 10
)
selected.predictors
#without response
#without preference_order
#restrictive max_vif
#only numeric predictors are processed
selected.predictors <- vif_select(
df = vi,
predictors = vi_predictors,
max_vif = 2.5
)
selected.predictors
#with response
#without preference_order
#restrictive max_cor
#slightly different solution than previous one
#because categorical variables are target-enccoded
selected.predictors <- vif_select(
df = vi,
response = "vi_mean",
predictors = vi_predictors,
max_vif = 2.5
)
selected.predictors
#with response
#with user-defined preference_order
#restrictive max_cor
#numerics and categorical variables in output
selected.predictors <- vif_select(
df = vi,
response = "vi_mean",
predictors = vi_predictors,
preference_order = c(
"soil_type", #categorical variable
"soil_temperature_mean",
"swi_mean",
"rainfall_mean",
"evapotranspiration_mean"
),
max_vif = 2.5
)
selected.predictors
#with response
#with automated preference_order
#restrictive max_cor and max_vif
#numerics and categorical variables in output
preference.order <- preference_order(
df = vi,
response = "vi_mean",
predictors = vi_predictors,
f = f_rsquared #cor(response, predictor)
)
head(preference.order)
selected.predictors <- vif_select(
df = vi,
response = "vi_mean",
predictors = vi_predictors,
preference_order = preference.order,
max_vif = 2.5
)
selected.predictors
Run the code above in your browser using DataLab