require("CMF")
# Create data for a circular setup with three matrices and three
# object sets of varying sizes.
X <- list()
D <- c(10, 20, 30)
inds <- matrix(0, nrow = 3, ncol = 2)
# Matrix 1 is between sets 1 and 2 and has continuous data
inds[1, ] <- c(1, 2)
X[[1]] <- matrix(
rnorm(D[inds[1, 1]] * D[inds[1, 2]], 0, 1),
nrow = D[inds[1, 1]]
)
# Matrix 2 is between sets 1 and 3 and has binary data
inds[2, ] <- c(1, 3)
X[[2]] <- matrix(
round(runif(D[inds[2, 1]] * D[inds[2, 2]], 0, 1)),
nrow = D[inds[2, 1]]
)
# Matrix 3 is between sets 2 and 3 and has count data
inds[3, ] <- c(2, 3)
X[[3]] <- matrix(
round(runif(D[inds[3, 1]] * D[inds[3, 2]], 0, 6)),
nrow = D[inds[3, 1]]
)
# Convert the data into the right format
triplets <- lapply(X, matrix_to_triplets)
# Missing entries correspond to missing rows in the triple representation
# so they can be removed from training data by simply taking a subset
# of the rows.
train <- list()
test <- list()
keep_for_training <- c(100, 200, 300)
for (m in 1:3) {
subset <- sample(nrow(triplets[[m]]), keep_for_training[m])
train[[m]] <- triplets[[m]][subset, ]
test[[m]] <- triplets[[m]][-subset, ]
}
# Learn the model with the correct likelihoods
K <- 4
likelihood <- c("gaussian", "bernoulli", "poisson")
opts <- getCMFopts()
opts$iter.max <- 500 # Less iterations for faster computation
model <- CMF(train, inds, K, likelihood, D, test = test, opts = opts)
# Check the predictions
# Note that the data created here has no low-rank structure,
# so we should not expect good accuracy.
print(test[[1]][1:3, ])
print(model$out[[1]][1:3, ])
# predictions for the test set using the previously learned model
out <- predictCMF(test, model)
print(out$out[[1]][1:3, ])
print(out$error[[1]])
# ...this should be the same as the output provided by CMF()
print(model$out[[1]][1:3, ])
Run the code above in your browser using DataLab