# generate data, based on an example in Friedman (1991)
gendat <- function(n=200L, p=10L, sigma=1) {
x <- matrix(runif(n * p), n, p)
mu <- 10*sin(pi*x[, 1] * x[, 2]) + 20*(x[, 3] - 0.5)^2 + 10*x[, 4] + 5*x[, 5]
y <- mu + sigma * rnorm(n)
data.frame(x=x, mu=mu, y=y)
}
train <- gendat()
test <- gendat(n=25)
# keep trees for later prediction based on new data
sampler <- create_sampler(
y ~ brt(~ . - y, name="bart", keepTrees=TRUE),
sigma.mod=pr_invchisq(df=3, scale=var(train$y)),
data = train
)
sim <- MCMCsim(sampler, n.chain=2, n.iter=700, thin=2,
store.all=TRUE, verbose=FALSE)
(summ <- summary(sim))
plot(train$mu, summ$bart[, "Mean"]); abline(0, 1)
# NB prediction is currently slow
# \donttest{
pred <- predict(sim, newdata=test,
iters=sample(seq_len(n_draws(sim)), 100),
show.progress=FALSE
)
(summpred <- summary(pred))
plot(test$mu, summpred[, "Mean"]); abline(0, 1)
# }
Run the code above in your browser using DataLab