# Create some compatible data (here, from iris data set):
# Data should be in 0 to 1 range if random values in that range are used
# as initial weights (the default method).
# Thus, LVQ expects data in 0 to 1 range, scale the (numeric) data...
DATA <- as.matrix(iris[1:4])
c_min <- apply(DATA, 2, FUN = "min")
c_max <- apply(DATA, 2, FUN = "max")
c_rng <- c_max - c_min
DATA <- sweep(DATA, 2, FUN = "-", c_min)
DATA <- sweep(DATA, 2, FUN = "/", c_rng)
NUM_VARIABLES <- ncol(DATA)
# create a vector of desired class ids (consecutive ids, starting from 0):
CLASS <- as.integer(iris$Species) - 1
NUM_CLASSES <- length(unique(CLASS))
# avoid using data with NA or other special values:
if (sum(is.na(DATA)) > 0)
stop("NAs found in DATA")
if (sum(is.na(CLASS)) > 0)
stop("NAs found in CLASS")
# Example 1:
# (Note: the example uses DATA and CLASS variables defined earlier).
# use half of the data to train, the other half to evaluate how well LVQ was
# trained (interlaced half is used to select members of these data sets):
l1_train_dataset <- DATA[c(TRUE, FALSE),]
l1_train_class <- CLASS[c(TRUE, FALSE)]
l1_test_dataset <- DATA[c(FALSE, TRUE),]
l1_test_class <- CLASS[c(FALSE, TRUE)]
# now create the NN:
l1 <- new("LVQs")
# train it:
l1$encode(l1_train_dataset, l1_train_class, 100)
# recall the same data (a simple check of how well the LVQ was trained):
l1_recalled_class_ids <- l1$recall(l1_test_dataset)
# show results:
cat(
"Example 1 results: Correct ",
sum(l1_recalled_class_ids == l1_test_class),
"out of",
nrow(l1_test_dataset),
".\n"
)
# Example 2: (playing around with some optional settings)
# (Note: the example uses DATA, CLASS, NUM_CLASSES variables defined earlier).
# create the NN:
l2 <- new("LVQs")
# Optionally, the output layer could be expanded, e.g. use 2 nodes per each class:
l2$set_number_of_nodes_per_class(2)
# Optionally, for experimentation negative reinforcement can be disabled:
l2$disable_punishment()
# train it:
l2$encode(DATA, CLASS, 100)
# recall the same data (a simple check of how well the LVQ was trained):
l2_recalled_class_ids <- l2$recall(DATA)
# Done. Optional part for further examining results of training:
# collect the connection weights (codebook vector coordinates), number
# of rewards per node and corresponding class:
l2_codebook_vector_info <-
cbind(
matrix(l2$get_weights(),
ncol = ncol(DATA),
byrow = TRUE),
l2$get_number_of_rewards(),
rep(
0:(NUM_CLASSES - 1),
rep(l2$get_number_of_nodes_per_class(),
NUM_CLASSES)
)
)
colnames(l2_codebook_vector_info) <-
c(colnames(DATA), "Rewarded", "Class")
print(l2_codebook_vector_info)
# plot recalled classification:
plot(
DATA,
pch = l2_recalled_class_ids,
main = "LVQ recalled clusters (LVQs module)",
xlim = c(-0.2, 1.2),
ylim = c(-0.2, 1.2)
)
# plot connection weights (a.k.a codebook vectors):
# the big circles are codebook vectors, (crossed-out if they were never used
# to assign a training data point to the correct class, i.e. never rewarded)
points(
l2_codebook_vector_info[, 1:2],
cex = 4,
pch = ifelse(l2_codebook_vector_info[, "Rewarded"] > 0, 1, 13),
col = l2_codebook_vector_info[, "Class"] + 10
)
# show results:
cat(
"Example 2 results: Correct ",
sum(l2_recalled_class_ids == CLASS),
"out of",
nrow(DATA),
".\n"
)
# Example 3 (demonstrate 'setup' and some other methods it allows):
# (Note: uses DATA, CLASS, NUM_VARIABLES, NUM_CLASSES defined earlier).
# create the NN:
l3 <- new("LVQs")
l3_number_of_output_nodes_per_class <- 3
# setup the LVQ:
l3$setup(NUM_VARIABLES,
NUM_CLASSES,
l3_number_of_output_nodes_per_class)
l3$set_weight_limits(-0.5 , 1.5)
l3$set_encoding_coefficients(0.2,-sum(CLASS == 0) / length(CLASS))
# experiment with setting initial weights (codebook vectors) per output node;
# here, weights are set to the mean vector of the training set data for the
# class the output node corresponds to:
class_means <- aggregate(DATA, list(CLASS), FUN = mean)
class_means <- t(class_means)[-1,]
l3_initial_weights <- NULL
for (i in 1:l3_number_of_output_nodes_per_class)
l3_initial_weights <- rbind(l3_initial_weights, class_means)
l3$set_weights(as.vector(l3_initial_weights))
# now train it:
l3$encode(DATA, CLASS, 100)
# recall the same data (a simple check of how well the LVQ was trained):
l3_recalled_class_ids <- l3$recall(DATA, 0)
# show results:
cat(
"Example 3 results: Correct ",
sum(l3_recalled_class_ids == CLASS),
"out of",
nrow(DATA),
".\n"
)
Run the code above in your browser using DataLab