# Classification with Oblique Decision Tree.
data(seeds)
set.seed(221212)
train <- sample(1:209, 100)
train_data <- data.frame(seeds[train, ])
test_data <- data.frame(seeds[-train, ])
tree <- ODT(varieties_of_wheat ~ ., train_data, split = "entropy")
pred <- predict(tree, test_data[, -8])
# classification error
(mean(pred != test_data[, 8]))
# Regression with Oblique Decision Tree.
data(body_fat)
set.seed(221212)
train <- sample(1:252, 100)
train_data <- data.frame(body_fat[train, ])
test_data <- data.frame(body_fat[-train, ])
tree <- ODT(Density ~ ., train_data,
split = "mse",
NodeRotateFun = "RotMatPPO", paramList = list(model = "Log", dimProj = "Rand")
)
pred <- predict(tree, test_data[, -1])
# estimation error
mean((pred - test_data[, 1])^2)
# Use "Z" as the splitting variable to build a linear model tree for "X" and "y".
set.seed(10)
cutpoint <- 50
X <- matrix(rnorm(100 * 10), 100, 10)
age <- sample(seq(20, 80), 100, replace = TRUE)
height <- sample(seq(50, 200), 100, replace = TRUE)
weight <- sample(seq(5, 150), 100, replace = TRUE)
Z <- cbind(age = age, height = height, weight = weight)
mu <- rep(0, 100)
mu[age <= cutpoint] <- X[age <= cutpoint, 1] + X[age <= cutpoint, 2]
mu[age > cutpoint] <- X[age > cutpoint, 1] + X[age > cutpoint, 3]
y <- mu + rnorm(100)
# Regression model tree
my.tree <- ODT(
X = X, y = y, Xsplit = Z, split = "linear", lambda = 0,
NodeRotateFun = "RotMatRF",
glmnetParList = list(lambda = 0, family = "gaussian")
)
pred <- predict(my.tree, X, Xsplit = Z)
# fitting error
mean((pred - y)^2)
mean((my.tree$predicted - y)^2)
# Classification model tree
y1 <- (y > 0) * 1
my.tree <- ODT(
X = X, y = y1, Xsplit = Z, split = "linear", lambda = 0,
NodeRotateFun = "RotMatRF", MinLeaf = 10, MaxDepth = 5,
glmnetParList = list(family = "binomial")
)
(class <- predict(my.tree, X, Xsplit = Z, type = "pred"))
(prob <- predict(my.tree, X, Xsplit = Z, type = "prob"))
# Projection analysis of the oblique decision tree.
data(iris)
tree <- ODT(Species ~ .,
data = iris, split = "gini",
paramList = list(model = "PPR", numProj = 1)
)
print(round(tree[["projections"]], 3))
### Train ODT on one-of-K encoded categorical data ###
# Note that the category variable must be placed at the beginning of the predictor X
# as in the following example.
set.seed(22)
Xcol1 <- sample(c("A", "B", "C"), 100, replace = TRUE)
Xcol2 <- sample(c("1", "2", "3", "4", "5"), 100, replace = TRUE)
Xcon <- matrix(rnorm(100 * 3), 100, 3)
X <- data.frame(Xcol1, Xcol2, Xcon)
Xcat <- c(1, 2)
catLabel <- NULL
y <- as.factor(sample(c(0, 1), 100, replace = TRUE))
tree <- ODT(X, y, split = "entropy", Xcat = NULL)
head(X)
#> Xcol1 Xcol2 X1 X2 X3
#> 1 B 5 -0.04178453 2.3962339 -0.01443979
#> 2 A 4 -1.66084623 -0.4397486 0.57251733
#> 3 B 2 -0.57973333 -0.2878683 1.24475578
#> 4 B 1 -0.82075051 1.3702900 0.01716528
#> 5 C 5 -0.76337897 -0.9620213 0.25846351
#> 6 A 5 -0.37720294 -0.1853976 1.04872159
# one-of-K encode each categorical feature and store in X1
numCat <- apply(X[, Xcat, drop = FALSE], 2, function(x) length(unique(x)))
# initialize training data matrix X
X1 <- matrix(0, nrow = nrow(X), ncol = sum(numCat))
catLabel <- vector("list", length(Xcat))
names(catLabel) <- colnames(X)[Xcat]
col.idx <- 0L
# convert categorical feature to K dummy variables
for (j in seq_along(Xcat)) {
catMap <- (col.idx + 1):(col.idx + numCat[j])
catLabel[[j]] <- levels(as.factor(X[, Xcat[j]]))
X1[, catMap] <- (matrix(X[, Xcat[j]], nrow(X), numCat[j]) ==
matrix(catLabel[[j]], nrow(X), numCat[j], byrow = TRUE)) + 0
col.idx <- col.idx + numCat[j]
}
X <- cbind(X1, X[, -Xcat])
colnames(X) <- c(paste(rep(seq_along(numCat), numCat), unlist(catLabel),
sep = "."
), "X1", "X2", "X3")
# Print the result after processing of category variables.
head(X)
#> 1.A 1.B 1.C 2.1 2.2 2.3 2.4 2.5 X1 X2 X3
#> 1 0 1 0 0 0 0 0 1 -0.04178453 2.3962339 -0.01443979
#> 2 1 0 0 0 0 0 1 0 -1.66084623 -0.4397486 0.57251733
#> 3 0 1 0 0 1 0 0 0 -0.57973333 -0.2878683 1.24475578
#> 4 0 1 0 1 0 0 0 0 -0.82075051 1.3702900 0.01716528
#> 5 0 0 1 0 0 0 0 1 -0.76337897 -0.9620213 0.25846351
#> 6 1 0 0 0 0 0 0 1 -0.37720294 -0.1853976 1.04872159
catLabel
#> $Xcol1
#> [1] "A" "B" "C"
#>
#> $Xcol2
#> [1] "1" "2" "3" "4" "5"
tree <- ODT(X, y, split = "gini", Xcat = c(1, 2), catLabel = catLabel, NodeRotateFun = "RotMatRF")
Run the code above in your browser using DataLab