# ---- Generate a toy dataset for usage examples: "data" ----
set.seed(123)
n <- 200
x1 <- sample(c("A", "B", "C", "D"), size = n, replace = TRUE, prob = c(0.1, 0.2, 0.3, 0.4))
x2 <- sample(c("W", "X", "Y", "Z"), size = n, replace = TRUE, prob = c(0.4, 0.3, 0.2, 0.1))
x3 <- sample(c("E", "F", "G", "H", "I"), size = n,
replace = TRUE, prob = c(0.2, 0.3, 0.2, 0.2, 0.1))
x4 <- sample(c("A", "B", "C", "D"), size = n, replace = TRUE)
x5 <- sample(c("L", "M", "N"), size = n, replace = TRUE)
x6 <- sample(c("E", "F", "G", "H", "I"), size = n, replace = TRUE)
# Generate y variable dependent on x1 to x3
x1_num <- as.numeric(factor(x1, levels = c("A", "B", "C", "D")))
x2_num <- as.numeric(factor(x2, levels = c("W", "X", "Y", "Z")))
x3_num <- as.numeric(factor(x3, levels = c("E", "F", "G", "H", "I")))
# Calculate y with added noise
y_numeric <- 3*x1_num + 2*x2_num - 2*x3_num + rnorm(n,mean=0,sd=2)
# Discretize y into categories
y <- cut(y_numeric, breaks = 10, labels = paste0("Category", 1:10))
# Combine into a dataframe
data <- data.frame(x1, x2, x3, x4, x5, x6, y)
# The outcome of the toy dataset is dependent on x1, x2, and x3
# but is independent of x4, x5, and x6.
head(data)
# ---- Usage Examples ----
## Return the suggested combination with the default settings:
CASMI.mineCombination(data)
## Return combinations when the number of variables to be included
## in each combination is specified (e.g., NumOfVar = 2):
CASMI.mineCombination(data, NumOfVar = 2)
## Return combinations when the number of variables to be included
## in each combination is specified (e.g., NumOfVar = 2),
## while the number of top combinations to return is specified
## (e.g., NumOfComb = 2):
CASMI.mineCombination(data,
NumOfVar = 2,
NumOfComb = 2)
Run the code above in your browser using DataLab