library(forestFloor)
library(randomForest)
library(rgl)
#simulate data
obs=2500
vars = 6
X = data.frame(replicate(vars,rnorm(obs)))
Y = with(X, X1^2 + 2*sin(X2*pi) + 8 * X3 * X4)
Yerror = 15 * rnorm(obs)
cor(Y,Y+Yerror)^2 #relatively noisy system
Y= Y+Yerror
#grow a forest, remeber to include inbag
rfo=randomForest(X,Y,keep.inbag=TRUE,ntree=1000,sampsize=800)
#obtain
ff = forestFloor(rfo,X)
#convolute the interacting feature contributions by their feature to understand relationship
fc34_convoluted = convolute_ff2(ff,Xi=3:4,FCi=3:4, #arguments for the wrapper
userArgs.kknn = alist(kernel="gaussian",k=25)) #arguments for train.kknn
#plot the joined convolution
plot3d(ff$X[,3],ff$X[,4],fc34_convoluted,
main="convolution of two feature contributions by their own vaiables",
#add some colour gradients to ease visualization
#box.outliers squese all observations in a 2 std.dev box
#univariately for a vector or matrix and normalize to [0;1]
col=rgb(.7*box.outliers(fc34_convoluted),
.7*box.outliers(ff$X[,3]),
.7*box.outliers(ff$X[,4]))
)
Run the code above in your browser using DataLab