# NOT RUN {
#############################################
#############################################
## EXAMPLE ON SYNTHETIC DATA: ##
## Can change n, p, q, p.act, max.steps, ##
## and burnin below to reproduce the ##
## simulations from Section 5.1 of Bai ##
## and Ghosh ##
#############################################
#############################################
n <- 50
p <- 10
q <- 3
# Active number of predictors is 2
p.act <- 2
#############################
# Generate design matrix X. #
#############################
times <- 1:p
rho <- 0.5
H <- abs(outer(times, times, "-"))
V <- rho^H
mu <- rep(0, p)
# Rows of X are simulated from MVN(0,V)
X <- mvrnorm(n, mu, V)
# Center X
X <- scale(X, center=TRUE, scale=FALSE)
#########################################
# Generate true coefficient matrix B_0. #
#########################################
# Entries in nonzero rows are drawn from Unif[(-5,-0.5)U(0.5,5)]
B.act <- runif(p.act*q,-5,4)
disjoint <- function(x){
if(x <= -0.5)
return(x)
else
return(x+1)
}
B.act <- matrix(sapply(B.act, disjoint),p.act,q)
# Set rest of the rows equal to 0
B.true <- rbind(B.act,matrix(0,p-p.act,q))
B.true <- B.true[sample(1:p),] # permute the rows
########################################
# Generate true error covariance Sigma #
########################################
sigma.sq=2
times <- 1:q
H <- abs(outer(times, times, "-"))
Sigma <- sigma.sq * rho^H
###########################
# Generate noise matrix E #
###########################
mu <- rep(0,q)
E <- mvrnorm(n, mu, Sigma)
##############################
# Generate response matrix Y #
##############################
Y <- crossprod(t(X),B.true) + E
#########################################
# Run the MBSP model on synthetic data. #
#########################################
# For optimal estimation, change max.steps to 15,000
# and change burnin to be 5000
mbsp.model = mbsp.tpbn(X=X, Y=Y, max.steps = 2000, burnin=1000)
# }
# NOT RUN {
################################
# Compute MSE_est and MSE_pred #
################################
MSE.est <- sum((mbsp.model$B.est-B.true)^2)/(p*q)
MSE.est # Note that in the paper it is scaled by factor of 100
MSE.pred <- sum((crossprod(t(X), mbsp.model$B.est - B.true))^2)/(n*q)
MSE.pred # Note that in the paper it is scaled by a factor of 100
################################
# Compute the FDR, FNR, and MP #
################################
# Initialize vector for classification of predictors i = 1,...,p
predictor.classifications <- rep(0, p)
# Initialize vector for true classifications of predictors i = 1,...,p
true.classifications <- rep(0,p)
# True active predictors in B.True
true.active.indices <- which(rowSums(B.true) != 0)
# Rest true signals as 1
true.classifications[true.active.indices] <- 1
# Active predictors according to our estimates
predictor.classifications[mbsp.model$active.predictors] <- 1
# Keep track of false positives and false negatives
false.pos <- length(which(predictor.classifications != 0 & true.classifications == 0))
tot.pos <- length(which(predictor.classifications != 0))
false.neg <- length(which(predictor.classifications == 0 & true.classifications != 0))
tot.neg <- length(which(predictor.classifications == 0))
# Compute FDR, FNR, and MP
fdr.rate <- false.pos/tot.pos
fnr.rate <- false.neg/tot.neg
mis.prob <- (false.pos+false.neg)/p
fdr.rate
fnr.rate
mis.prob
# }
# NOT RUN {
# }
# NOT RUN {
# }
# NOT RUN {
#
#
#############################################
#############################################
## MBSP analysis of yeast cell cycle ##
## data set (Section 5.2 of Bai and Ghosh) ##
#############################################
#############################################
# Load yeast data set
data(yeast)
# Set seed
set.seed(12345)
# Design matrix X and response matrix Y
X <- yeast$x
X <- scale(X, center=TRUE, scale=FALSE)
Y <- yeast$y
Y <- scale(Y, center=TRUE, scale=FALSE)
# Make sure they are matrices
X <- matrix(X, nrow=nrow(X))
Y <- matrix(Y, nrow=nrow(Y))
###################################
# Run the MBSP model on the yeast #
# cell cycle data set #
###################################
mbsp.model = mbsp.tpbn(X=X,Y=Y)
# Display the active predictors (correspond to row indices in B)
mbsp.model$active.predictors
# Display names of four of the active TFs
colnames(yeast$x)[2]
colnames(yeast$x)[38]
colnames(yeast$x)[61]
colnames(yeast$x)[94]
# For horizontal axis (time)
time <- seq(from=0, to=119, by=7)
##############################################
# Open pdf to plot 4 of the active TFs. #
# This reproduces Figure 1 of Bai and Ghosh #
##############################################
pdf(file=file.path(tempdir(), "significantTFs.pdf"), width=5, height=5)
par(mfrow=c(2,2), mar=c(2,2,2,2))
plot(time, mbsp.model$B.est[2,], type="l", cex.axis = 0.5, xlab="",
main="ACE2", ylim=c(-0.6,0.6), ylab="")
abline(h=0)
lines(time, mbsp.model$lower.endpoints[2,], lty=2)
lines(time, mbsp.model$upper.endpoints[2,], lty=2)
plot(time, mbsp.model$B.est[38,], type="l", cex.axis = 0.5, xlab="",
main="HIR1", ylim=c(-0.6,0.6), ylab="")
abline(h=0)
lines(time, mbsp.model$lower.endpoints[38,], lty=2)
lines(time, mbsp.model$upper.endpoints[38,], lty=2)
plot(time, mbsp.model$B.est[61,], type="l", cex.axis = 0.5, xlab="",
main="NDD1", ylim=c(-0.6,0.6), ylab="")
abline(h=0)
lines(time, mbsp.model$lower.endpoints[61,], lty=2)
lines(time, mbsp.model$upper.endpoints[61,], lty=2)
plot(time, mbsp.model$B.est[94,], type="l", cex.axis = 0.5, xlab="",
main="SWI6", ylim=c(-0.6,0.6), ylab="")
abline(h=0)
lines(time, mbsp.model$lower.endpoints[94,], lty=2)
lines(time, mbsp.model$upper.endpoints[94,], lty=2)
dev.off()
# }
# NOT RUN {
# }
Run the code above in your browser using DataLab