## see wsjibm, congress109, and we8there for data examples
## Simulation Parameters
K <- 10
n <- 150
p <- 200
omega <- t(rdir(n, rep(1/K,K)))
theta <- rdir(K, rep(1/p,p))
## Simulated counts
Q <- omega%*%t(theta)
counts <- matrix(ncol=p, nrow=n)
totals <- rpois(n, 250)
for(i in 1:n){ counts[i,] <- rmultinom(1, size=totals[i], prob=Q[i,]) }
## Bayes Factor model selection (should choose K or nearby)
simselect <- topics(counts, K=5:15, tol=.01, verb=1)
print(simselect$K)
## MAP fit for given K=10
simfit <- topics(counts, K=K, verb=2)
## Adjust for label switching and plot the fit (color by topic)
toplab <- rep(0,K)
for(k in 1:K){ toplab[k] <- which.min(colSums(abs(simfit$omega-omega[,k]))) }
par(mfrow=c(1,2))
tpxcols <- matrix(rainbow(K), ncol=nrow(theta), byrow=TRUE)
plot(theta,simfit$theta[,toplab], ylab="fitted values", pch=21, bg=tpxcols)
plot(omega,simfit$omega[,toplab], ylab="fitted values", pch=21, bg=tpxcols)
title("True vs Fitted Values (color by topic)", outer=TRUE, line=-2)
## The S3 method plot functions
par(mfrow=c(1,2))
plot(simfit, lgd.K=2)
plot(simfit, type="resid")
Run the code above in your browser using DataLab