library(cmfrec)
### Simplest example
SeqMat <- matrix(1:50, nrow=10)
SeqMat[2,1] <- NaN
SeqMat[8,3] <- NaN
m <- CMF(SeqMat, k=1, lambda=1e-10, nthreads=1L, verbose=FALSE)
imputeX(m, SeqMat)
### Better example with multivariate normal data
if (require("MASS")) {
### Generate random data, set some values as NA
set.seed(1)
n_rows <- 1000
n_cols <- 5
mu <- rnorm(n_cols)
S <- matrix(rnorm(n_cols^2), nrow = n_cols)
S <- t(S) %*% S
X <- MASS::mvrnorm(n_rows, mu, S)
X_na <- X
values_NA <- matrix(runif(n_rows*n_cols) < .15, nrow=n_rows)
X_na[values_NA] <- NaN
### In the event that any column is fully missing
if (any(colSums(is.na(X_na)) == n_rows)) {
cols_remove <- colSums(is.na(X_na)) == n_rows
X_na <- X_na[, !cols_remove, drop=FALSE]
values_NA <- values_NA[, !cols_remove, drop=FALSE]
}
### Impute missing values with model
model <- CMF(X_na, k=3, lambda=c(0,0,1,1,1,1),
user_bias=FALSE,
verbose=FALSE, nthreads=1L)
X_imputed <- imputeX(model, X_na)
cat(sprintf("RMSE for imputed values w/model: %f\n",
sqrt(mean((X[values_NA] - X_imputed[values_NA])^2))))
### Compare against simple mean imputation
X_means <- apply(X_na, 2, mean, na.rm=TRUE)
X_imp_mean <- X_na
for (cl in 1:n_cols)
X_imp_mean[values_NA[,cl], cl] <- X_means[cl]
cat(sprintf("RMSE for imputed values w/means: %f\n",
sqrt(mean((X[values_NA] - X_imp_mean[values_NA])^2))))
}
Run the code above in your browser using DataLab