# NOT RUN {
library(SSLR)
## Load Wine data set
data(wine)
cls <- which(colnames(wine) == "Wine")
x <- wine[, - cls] # instances without classes
y <- wine[, cls] # the classes
x <- scale(x)
set.seed(20)
# Use 50% of instances for training
tra.idx <- sample(x = length(y), size = ceiling(length(y) * 0.5))
xtrain <- x[tra.idx,]
ytrain <- y[tra.idx]
# Use 70% of train instances as unlabeled set
tra.na.idx <- sample(x = length(tra.idx), size = ceiling(length(tra.idx) * 0.7))
ytrain[tra.na.idx] <- NA
# Use the other 50% of instances for inductive testing
tst.idx <- setdiff(1:length(y), tra.idx)
xitest <- x[tst.idx,] # testing instances
yitest <- y[tst.idx] # classes of instances in xitest
# Use the unlabeled examples for transductive testing
xttest <- x[tra.idx[tra.na.idx],] # transductive testing instances
yttest <- y[tra.idx[tra.na.idx]] # classes of instances in xttest
library(caret)
#PREPARE DATA
data <- cbind(xtrain, Class = ytrain)
dtrain <- as.matrix(proxy::dist(x = xtrain, method = "euclidean", by_rows = TRUE))
ditest <- as.matrix(proxy::dist(x = xitest, y = xtrain, method = "euclidean", by_rows = TRUE))
ddata <- cbind(dtrain, Class = ytrain)
ddata <- as.data.frame(ddata)
ktrain <- as.matrix(exp(-0.048 * dtrain ^ 2))
kdata <- cbind(ktrain, Class = ytrain)
kdata <- as.data.frame(kdata)
ktrain <- as.matrix(exp(-0.048 * dtrain ^ 2))
kitest <- as.matrix(exp(-0.048 * ditest ^ 2))
## Example: Training from a set of instances with 1-NN (knn3) as base classifier.
gen.learner <- function(indexes, cls)
caret::knn3(x = xtrain[indexes,], y = cls, k = 1)
gen.pred <- function(model, indexes)
predict(model, xtrain[indexes,])
trControl_selfTrainingG1 <- list(gen.learner = gen.learner, gen.pred = gen.pred)
md1 <- train_generic(ytrain, method = "selfTrainingG", trControl = trControl_selfTrainingG1)
p1 <- predict(md1$model, xitest, type = "class")
table(p1, yitest)
confusionMatrix(p1, yitest)$overall[1]
## Example: Training from a distance matrix with 1-NN (oneNN) as base classifier.
dtrain <- as.matrix(proxy::dist(x = xtrain, method = "euclidean", by_rows = TRUE))
gen.learner <- function(indexes, cls) {
m <- SSLR::oneNN(y = cls)
attr(m, "tra.idxs") <- indexes
m
}
gen.pred <- function(model, indexes) {
tra.idxs <- attr(model, "tra.idxs")
d <- dtrain[indexes, tra.idxs]
prob <- predict(model, d, distance.weighting = "none")
prob
}
trControl_selfTrainingG2 <- list(gen.learner = gen.learner, gen.pred = gen.pred)
md2 <- train_generic(ytrain, method = "selfTrainingG", trControl = trControl_selfTrainingG2)
ditest <- proxy::dist(x = xitest, y = xtrain[md2$instances.index,],
method = "euclidean", by_rows = TRUE)
p2 <- predict(md2$model, ditest, type = "class")
table(p2, yitest)
confusionMatrix(p2, yitest)$overall[1]
# }
Run the code above in your browser using DataLab