# 1. Set parameters for data generation
# Use smaller dimensions for a quick example
n <- 50 # Number of samples
p <- 30 # Number of features
true_q <- 2 # True number of factors
# 2. Generate data from a Poisson factor model
set.seed(123) # For reproducibility
# Factor matrix (scores)
FF <- matrix(rnorm(n * true_q), nrow = n, ncol = true_q)
# Loading matrix
BB <- matrix(runif(p * true_q, min = -1, max = 1), nrow = p, ncol = true_q)
# Intercept term
a <- runif(p, min = 0, max = 1)
# Enforce identifiability for a unique generating model
FF0 <- add_identifiability(FF, BB, a)$H
BB0 <- add_identifiability(FF, BB, a)$B
alpha <- add_identifiability(FF, BB, a)$mu
# Calculate the mean matrix (lambda) with some noise
lambda <- exp(FF0 %*% t(BB0) + rep(1, n) %*% t(alpha) + matrix(rnorm(n*p, 0, 0.5), n, p))
# Generate the final count data matrix 'x'
x <- matrix(rpois(n * p, lambda = as.vector(lambda)), nrow = n, ncol = p)
# 3. Run multiDT to find the best number of factors
# Use small K and rmax for a quick example run
cv_results <- multiDT(x, K = 2, rmax = 4)
# 4. Print results and select the best 'r' based on the minimum TCV
print(cv_results$TCV)
best_r <- which.min(cv_results$TCV)
Run the code above in your browser using DataLab