# \donttest{
library(drf)
n = 500
p = 10
d = 3
# Generate training data
X = matrix(rnorm(n * p), nrow=n)
Y = matrix(rnorm(n * d), nrow=n)
Y[, 1] = Y[, 1] + X[, 1]
Y[, 2] = Y[, 2] * X[, 2]
Y[, 3] = Y[, 3] * X[, 1] + X[, 2]
# Fit DRF object
drf.forest = drf(X, Y, num.trees=100, num.threads=1)
# Generate test data
X_test = matrix(rnorm(10 * p), nrow=10)
out = predict(drf.forest, newdata=X_test)
# Compute E[Y_1 | X] for all data in X_test directly from
# the weights representing the estimated distribution
out$weights %*% out$y[,1]
out = predict(drf.forest, newdata=X_test,
functional='mean')
# Compute E[Y_1 | X] for all data in X_test using built-in functionality
out[,1]
out = predict(drf.forest, newdata=X_test,
functional='quantile',
quantiles=c(0.25, 0.75),
transformation=function(y){y[1] * y[2] * y[3]})
# Compute 25% and 75% quantiles of Y_1*Y_2*Y_3, conditionally on X = X_test[1, ]
out[1,,]
out = predict(drf.forest, newdata=X_test,
functional='cov',
transformation=function(y){matrix(1:6, nrow=2) %*% y})
# Compute 2x2 covariance matrix for (1*Y_1 + 3*Y_2 + 5*Y_3, 2*Y_1 + 4*Y_2 + 6*Y_3),
# conditionally on X = X_test[1, ]
out[1,,]
out = predict(drf.forest, newdata=X_test,
functional='custom',
custom.functional=function(y, w){c(sum(y[, 1] * w), sum(y[, 2] * w))})
# Compute E[Y_1, Y_2 | X] for all data in X_test by providing custom functional that
# computes it from the weights
out
## UNCERTAINTY WEIGHTS ######################################################
# Simulate Data that experiences both a mean as well as sd shift
set.seed(10)
n<-500
# Simulate from X
x1 <- runif(n,-1,1)
x2 <- runif(n,-1,1)
x3 <- x1+ runif(n,-1,1)
X0 <- matrix(runif(7*n,-1,1), nrow=n, ncol=7)
X <- cbind(x1,x2, x3, X0)
colnames(X)<-NULL
# Simulate dependent variable Y
Y <- as.matrix(rnorm(n,mean = 0.8*(x1 > 0), sd = 1 + 1*(x2 > 0)))
# Fit DRF with 50 CI groups, each 10 trees large. This results in 50 uncertainty weights
DRF <- drf(X=X, Y=Y,num.trees=500, min.node.size = 5, ci.group.size=500/50, num.threads=1)
# Obtain Test point
x<-matrix(c(0.2, 0.4, runif(8,-1,1)), nrow=1, ncol=10)
# (we only use 2 threads in this example due to cran limitations)
DRFpred<-predict(DRF, newdata=x, estimate.uncertainty=TRUE, num.threads=1)
## Sample from P_{Y| X=x}
Yxs<-Y[sample(1:n, size=n, replace = TRUE, DRFpred$weights[1,])]
# Calculate quantile prediction as weighted quantiles from Y
qx <- quantile(Yxs, probs = c(0.05,0.95))
# Calculate conditional mean prediction
mux <- mean(Yxs)
# True quantiles
q1<-qnorm(0.05, mean=0.8 * (x[1] > 0), sd=(1+(x[2] > 0)))
q2<-qnorm(0.95, mean=0.8 * (x[1] > 0), sd=(1+(x[2] > 0)))
mu<-0.8 * (x[1] > 0)
# Calculate uncertainty
alpha<-0.05
B<-nrow(DRFpred$weights.uncertainty[[1]])
qxb<-matrix(NaN, nrow=B, ncol=2)
muxb<-matrix(NaN, nrow=B, ncol=1)
for (b in 1:B){
Yxsb<-Y[sample(1:n, size=n, replace = TRUE, DRFpred$weights.uncertainty[[1]][b,])]
qxb[b,] <- quantile(Yxsb, probs = c(0.05,0.95))
muxb[b] <- mean(Yxsb)
}
CI.lower.q1 <- qx[1] - qnorm(1-alpha/2)*sqrt(var(qxb[,1]))
CI.upper.q1 <- qx[1] + qnorm(1-alpha/2)*sqrt(var(qxb[,1]))
CI.lower.q2 <- qx[2] - qnorm(1-alpha/2)*sqrt(var(qxb[,2]))
CI.upper.q2 <- qx[2] + qnorm(1-alpha/2)*sqrt(var(qxb[,2]))
CI.lower.mu <- mux - qnorm(1-alpha/2)*sqrt(var(muxb))
CI.upper.mu <- mux + qnorm(1-alpha/2)*sqrt(var(muxb))
hist(Yxs, prob=TRUE)
z<-seq(-6,7,by=0.01)
d<-dnorm(z, mean=0.8 * (x[1] > 0), sd=(1+(x[2] > 0)))
lines(z,d, col="darkred" )
abline(v=q1,col="darkred" )
abline(v=q2, col="darkred" )
abline(v=qx[1], col="darkblue")
abline(v=qx[2], col="darkblue")
abline(v=mu, col="darkred")
abline(v=mux, col="darkblue")
abline(v=CI.lower.q1, col="darkblue", lty=2)
abline(v=CI.upper.q1, col="darkblue", lty=2)
abline(v=CI.lower.q2, col="darkblue", lty=2)
abline(v=CI.upper.q2, col="darkblue", lty=2)
abline(v=CI.lower.mu, col="darkblue", lty=2)
abline(v=CI.upper.mu, col="darkblue", lty=2)
# }
Run the code above in your browser using DataLab