## avoid testing of rgl 3D plot on headless non-windows OS
## users can disregard this sentence.
if(!interactive() && Sys.info()["sysname"]!="Windows") skip=TRUE
library(rgl)
library(randomForest)
library(forestFloor)
#simulate data
obs=1500
vars = 6
X = data.frame(replicate(vars,runif(obs)))*2-1
Y = with(X, X1*2 + 2*sin(X2*pi) + 3* (X3+X2)^2 )
Yerror = 1 * rnorm(obs)
var(Y)/var(Y+Yerror)
Y= Y+Yerror
#grow a forest, remember to include inbag
rfo=randomForest::randomForest(X,Y,
keep.inbag=TRUE,
ntree=1000,
replace=TRUE,
sampsize=500,
importance=TRUE)
#compute ff
ff = forestFloor(rfo,X)
#print forestFloor
print(ff)
#plot partial functions of most important variables first
Col=fcol(ff,1)
plot(ff,col=Col,orderByImportance=TRUE)
#the pure feature contributions
rgl::plot3d(ff$X[,2],ff$X[,3],apply(ff$FCmatrix[,2:3],1,sum),
#add some colour gradients to ease visualization
#box.outliers squese all observations in a 2 std.dev box
#univariately for a vector or matrix and normalize to [0;1]
col=fcol(ff,2,orderByImportance=FALSE))
#add grid convolution/interpolation
#make grid with current function
grid23 = convolute_grid(ff,Xi=2:3,userArgs.kknn= alist(k=25,kernel="gaus"),grid=50,zoom=1.2)
#apply grid on 3d-plot
rgl::persp3d(unique(grid23[,2]),unique(grid23[,3]),grid23[,1],alpha=0.3,
col=c("black","grey"),add=TRUE)
#anchor points of grid could be plotted also
rgl::plot3d(grid23[,2],grid23[,3],grid23[,1],alpha=0.3,col=c("black"),add=TRUE)
## and we se that their is almost no variance out of the surface, thus is FC2 and FC3
## well explained by the feature context of both X3 and X4
### next example show how to plot a 3D grid + feature contribution
## this 4D application is very experimental
#Make grid of three effects, 25^3 = 15625 anchor points
grid123 = convolute_grid(ff,
Xi=c(1:3),
FCi=c(1:3),
userArgs.kknn = alist(
k= 100,
kernel = "gaussian",
distance = 1),
grid=25,
zoom=1.2)
#Select a dimension to place in layers
uni2 = unique(grid123[,2]) #2 points to X1 and FC1
uni2=uni2[c(7,9,11,13,14,16,18)] #select some layers to visualize
## plotting any combination of X2 X3 in each layer(from red to green) having different value of X1
count = 0
add=FALSE
for(i in uni2) {
count = count +1
this34.plane = grid123[grid123[,2]==i,]
if (count==2) add=TRUE
# plot3d(ff$X[,1],ff$X[,2]
persp3d(unique(this34.plane[,3]),
unique(this34.plane[,4]),
this34.plane[,1], add=add,
col=rgb(count/length(uni2),1-count/length(uni2),0),alpha=0.1)
}
## plotting any combination of X1 X3 in each layer(from red to green) having different value of X2
uni3 = unique(grid123[,4]) #2 points to X1 and FC1
uni3=uni3[c(7,9,11,13,14,16,18)] #select some layers to visualize
count = 0
add=FALSE
for(i in uni3) {
count = count +1
this34.plane = grid123[grid123[,4]==i,]
if (count==2) add=TRUE
#plot3d(ff$X[,1],ff$X[,2])
persp3d(unique(this34.plane[,2]),
unique(this34.plane[,3]),
this34.plane[,1], add=add,
col=rgb(count/length(uni3),1-count/length(uni3),0),alpha=0.1)
}
Run the code above in your browser using DataLab