# The examples below that are run by CRAN use n.cores=2 to limit the number
# of cores to two for CRAN check compliance.
# In practice you can simply omit this option to let bayesreg use as many
# as are available (which is usually total number of cores - 1)
# If you do not want to use multiple cores you can set parallel=F
# -----------------------------------------------------------------
# Example 1: Fitting linear models to data and generating credible intervals
X = 1:10;
y = c(-0.6867, 1.7258, 1.9117, 6.1832, 5.3636, 7.1139, 9.5668, 10.0593, 11.4044, 6.1677);
df = data.frame(X,y)
# Gaussian ridge
rv.L <- bayesreg(y~., df, model = "laplace", prior = "ridge", n.samples = 1e3, n.cores = 2)
# Plot the different estimates with credible intervals
plot(df$X, df$y, xlab="x", ylab="y")
yhat <- predict(rv.L, df, bayes.avg=TRUE)
lines(df$X, yhat[,1], col="blue", lwd=2.5)
lines(df$X, yhat[,3], col="blue", lwd=1, lty="dashed")
lines(df$X, yhat[,4], col="blue", lwd=1, lty="dashed")
yhat <- predict(rv.L, df, bayes.avg=TRUE, sum.stat = "median")
lines(df$X, yhat[,1], col="red", lwd=2.5)
legend(1,11,c("Posterior Mean (Bayes Average)","Posterior Median (Bayes Average)"),
lty=c(1,1),col=c("blue","red"),lwd=c(2.5,2.5), cex=0.7)
# -----------------------------------------------------------------
# Example 2: Predictive density for continuous data
X = 1:10;
y = c(-0.6867, 1.7258, 1.9117, 6.1832, 5.3636, 7.1139, 9.5668, 10.0593, 11.4044, 6.1677);
df = data.frame(X,y)
# Gaussian ridge
rv.G <- bayesreg(y~., df, model = "gaussian", prior = "ridge", n.samples = 1e3, n.cores = 2)
# Produce predictive density for X=2
df.tst = data.frame(y=seq(-7,12,0.01),X=2)
prob_noavg_mean <- predict(rv.G, df.tst, bayes.avg=FALSE, type="prob", sum.stat = "mean")
prob_noavg_med <- predict(rv.G, df.tst, bayes.avg=FALSE, type="prob", sum.stat = "median")
prob_avg <- predict(rv.G, df.tst, bayes.avg=TRUE, type="prob")
# Plot the density
plot(NULL, xlim=c(-7,12), ylim=c(0,0.14), xlab="y", ylab="p(y)")
lines(df.tst$y, prob_noavg_mean[,1],lwd=1.5)
lines(df.tst$y, prob_noavg_med[,1], col="red",lwd=1.5)
lines(df.tst$y, prob_avg[,1], col="green",lwd=1.5)
legend(-7,0.14,c("Mean (no averaging)","Median (no averaging)","Bayes Average"),
lty=c(1,1,1),col=c("black","red","green"),lwd=c(1.5,1.5,1.5), cex=0.7)
title('Predictive densities for X=2')
if (FALSE) {
# -----------------------------------------------------------------
# Example 3: Poisson (count) regression
X = matrix(rnorm(100*20),100,5)
b = c(0.5,-1,0,0,1)
nu = X%*%b + 1
y = rpois(lambda=exp(nu),n=length(nu))
df <- data.frame(X,y)
# Fit a Poisson regression
rv.pois = bayesreg(y~.,data=df, model="poisson", prior="hs", burnin=1e4, n.samples=1e4)
# Make a prediction for the first five rows
# By default this predicts the log-rate (i.e., the linear predictor)
predict(rv.pois,df[1:5,])
# This is the response (i.e., conditional mean of y)
exp(predict(rv.pois,df[1:5,]))
# Same as above ... compare to the actual targets
cbind(exp(predict(rv.pois,df[1:5,])), y[1:5])
# Slightly different as E[exp(x)]!=exp(E[x])
predict(rv.pois,df[1:5,], type="response", bayes.avg=TRUE)
# 99% credible interval for response
predict(rv.pois,df[1:5,], type="response", bayes.avg=TRUE, CI=99)
# -----------------------------------------------------------------
# Example 4: Logistic regression on spambase
data(spambase)
# bayesreg expects binary targets to be factors
spambase$is.spam <- factor(spambase$is.spam)
# First take a subset of the data (1/10th) for training, reserve the rest for testing
spambase.tr = spambase[seq(1,nrow(spambase),10),]
spambase.tst = spambase[-seq(1,nrow(spambase),10),]
# Fit a model using logistic horseshoe for 2,000 samples
rv <- bayesreg(is.spam ~ ., spambase.tr, model = "logistic", prior = "horseshoe", n.samples = 2e3)
# Summarise, sorting variables by their ranking importance
rv.s <- summary(rv,sort.rank=TRUE)
# Make predictions about testing data -- get class predictions and class probabilities
y_pred <- predict(rv, spambase.tst, type='class')
y_prob <- predict(rv, spambase.tst, type='prob')
# Check how well our predictions did by generating confusion matrix
table(y_pred, spambase.tst$is.spam)
# Calculate logarithmic loss on test data
y_prob <- predict(rv, spambase.tst, type='prob')
cat('Neg Log-Like for no Bayes average, posterior mean estimates: ', sum(-log(y_prob[,1])), '\n')
y_prob <- predict(rv, spambase.tst, type='prob', sum.stat="median")
cat('Neg Log-Like for no Bayes average, posterior median estimates: ', sum(-log(y_prob[,1])), '\n')
y_prob <- predict(rv, spambase.tst, type='prob', bayes.avg=TRUE)
cat('Neg Log-Like for Bayes average: ', sum(-log(y_prob[,1])), '\n')
}
Run the code above in your browser using DataLab