### Example 1
#-----
# - From two overlayed databases: using the table simu_data
# - Searching for the best predictors of "Yb1"
# - Using the row database
# - The RF approaches are not required
#-----
data(simu_data)
sel_ex1 <- select_pred(simu_data,
Y = "Yb1", Z = "Yb2", ID = 1, OUT = "Y",
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
thresh_cat = 0.30, thresh_num = 0.70, thresh_Y = 0.20,
RF = FALSE
)
### Example 2
#-----
# - With same conditions as example 1
# - Searching for the best predictors of "Yb2"
#-----
sel_ex2 <- select_pred(simu_data,
Y = "Yb1", Z = "Yb2", ID = 1, OUT = "Z",
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
thresh_cat = 0.30, thresh_num = 0.70, thresh_Y = 0.20,
RF = FALSE
)
# \donttest{
### Example 3
#-----
# - With same conditions as example 1
# - Using a RF approach to estimate the standard variable importance measures
# and determine the best subset of predictors
# - Here a seed is required
#-----
sel_ex3 <- select_pred(simu_data,
Y = "Yb1", Z = "Yb2", ID = 1, OUT = "Y",
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
thresh_cat = 0.30, thresh_num = 0.70, thresh_Y = 0.20,
RF = TRUE, RF_condi = FALSE, RF_SEED = 3023
)
### Example 4
#-----
# - With same conditions as example 1
# - Using a RF approach to estimate the conditional variable importance measures
# and determine the best subset of predictors
# - This approach requires to convert the numeric variables: Only "Age" here
# discretized in 3 levels
#-----
sel_ex4 <- select_pred(simu_data,
Y = "Yb1", Z = "Yb2", ID = 1, OUT = "Z",
quanti = c(3, 8), nominal = c(1, 4:5, 7), ordinal = c(2, 6),
convert_num = 8, convert_class = 3,
thresh_cat = 0.30, thresh_num = 0.70, thresh_Y = 0.20,
RF = TRUE, RF_condi = TRUE, RF_condi_thr = 0.60, RF_SEED = 3023
)
### Example 5
#-----
# - Starting with a unique database
# - Same conditions as example 1
#-----
simu_A <- simu_data[simu_data$DB == "A", -3] # Base A
sel_ex5 <- select_pred(simu_A,
Y = "Yb1",
quanti = 7, nominal = c(1, 3:4, 6), ordinal = c(2, 5),
thresh_cat = 0.30, thresh_num = 0.70, thresh_Y = 0.20,
RF = FALSE
)
### Example 6
#-----
# - Starting with an unique database
# - Using a RF approach to estimate the conditional variable importance measures
# and determine the best subset of predictors
# - This approach requires to convert the numeric variables: Only "Age" here
# discretized in 3 levels
#-----
simu_B <- simu_data[simu_data$DB == "B", -2] # Base B
sel_ex6 <- select_pred(simu_B,
Y = "Yb2",
quanti = 7, nominal = c(1, 3:4, 6), ordinal = c(2, 5),
convert_num = 7, convert_class = 3,
thresh_cat = 0.30, thresh_num = 0.70, thresh_Y = 0.20,
RF = TRUE, RF_condi = TRUE, RF_condi_thr = 0.60, RF_SEED = 3023
)
# }
Run the code above in your browser using DataLab