# NOT RUN {
# based on examples in the dismo package
# get predictor variables
library(dismo)
predictor.files <- list.files(path=paste(system.file(package="dismo"), '/ex', sep=''),
pattern='grd', full.names=TRUE)
predictors <- stack(predictor.files)
# subset based on Variance Inflation Factors
predictors <- subset(predictors, subset=c("bio5", "bio6",
"bio16", "bio17", "biome"))
predictors
predictors@title <- "predictors"
# presence points
presence_file <- paste(system.file(package="dismo"), '/ex/bradypus.csv', sep='')
pres <- read.table(presence_file, header=TRUE, sep=',')[,-1]
# the kfold function randomly assigns data to groups;
# groups are used as calibration (1/4) and training (3/4) data
groupp <- kfold(pres, 4)
pres_train <- pres[groupp != 1, ]
pres_test <- pres[groupp == 1, ]
# choose background points
background <- randomPoints(predictors, n=1000, extf=1.00)
colnames(background)=c('lon', 'lat')
groupa <- kfold(background, 4)
backg_train <- background[groupa != 1, ]
backg_test <- background[groupa == 1, ]
# formulae for random forest and generalized linear model
# compare with: ensemble.formulae(predictors, factors=c("biome"))
rfformula <- as.formula(pb ~ bio5+bio6+bio16+bio17)
glmformula <- as.formula(pb ~ bio5 + I(bio5^2) + I(bio5^3) +
bio6 + I(bio6^2) + I(bio6^3) + bio16 + I(bio16^2) + I(bio16^3) +
bio17 + I(bio17^2) + I(bio17^3) )
# fit four ensemble models (RF, GLM, BIOCLIM, DOMAIN)
ensemble.nofactors <- ensemble.calibrate.models(x=predictors, p=pres_train, a=backg_train,
pt=pres_test, at=backg_test,
species.name="Bradypus",
ENSEMBLE.tune=TRUE,
ENSEMBLE.min = 0.65,
MAXENT=0, MAXLIKE=0, GBM=0, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=0, GAM=0,
GAMSTEP=0, MGCV=0, MGCVFIX=0, EARTH=0, RPART=0, NNET=0, FDA=0,
SVM=0, SVME=0, GLMNET=0,
BIOCLIM.O=0, BIOCLIM=1, DOMAIN=1, MAHAL=0, MAHAL01=0,
Yweights="BIOMOD",
PLOTS=FALSE, evaluations.keep=TRUE, models.keep=TRUE,
RF.formula=rfformula,
GLM.formula=glmformula)
# with option models.keep, all model objects are saved in ensemble object
# the same slots can be used to replace model objects with new calibrations
ensemble.nofactors$models$RF
summary(ensemble.nofactors$models$GLM)
ensemble.nofactors$models$BIOCLIM
ensemble.nofactors$models$DOMAIN
ensemble.nofactors$models$formulae
# evaluations are kept in different slot
attributes(ensemble.nofactors$evaluations)
# fit four ensemble models (RF, GLM, BIOCLIM, DOMAIN) using default formulae
# variable 'biome' is not included as explanatory variable
# results are provided in a file in the 'outputs' subfolder of the working
# directory
ensemble.nofactors <- ensemble.calibrate.models(x=predictors,
p=pres_train, a=backg_train,
pt=pres_test, at=backg_test,
layer.drops="biome",
species.name="Bradypus",
ENSEMBLE.tune=TRUE,
ENSEMBLE.min=0.65,
SINK=TRUE,
MAXENT=0, MAXLIKE=0, GBM=0, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=0, GAM=0,
GAMSTEP=0, MGCV=0, MGCVFIX=0, EARTH=0, RPART=0, NNET=0, FDA=0,
SVM=0, SVME=0, GLMNET=0,
BIOCLIM.O=0, BIOCLIM=1, DOMAIN=1, MAHAL=0, MAHAL01=0,
Yweights="BIOMOD",
PLOTS=FALSE, evaluations.keep=TRUE,
formulae.defaults=TRUE)
# after fitting the individual algorithms (submodels),
# transform predictions with a probit link.
ensemble.nofactors <- ensemble.calibrate.models(x=predictors,
p=pres_train, a=backg_train,
pt=pres_test, at=backg_test,
layer.drops="biome",
species.name="Bradypus",
SINK=TRUE,
ENSEMBLE.tune=TRUE,
ENSEMBLE.min=0.65,
MAXENT=0, MAXLIKE=0, GBM=0, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=0, GAM=0,
GAMSTEP=0, MGCV=0, MGCVFIX=0, EARTH=0, RPART=0, NNET=0, FDA=0,
SVM=0, SVME=0, GLMNET=0,
BIOCLIM.O=0, BIOCLIM=1, DOMAIN=1, MAHAL=0, MAHAL01=0,
PROBIT=TRUE,
Yweights="BIOMOD", factors="biome",
PLOTS=FALSE, evaluations.keep=TRUE,
formulae.defaults=TRUE)
# Instead of providing presence and background locations, provide data.frames.
# Because 'biome' is a factor, RasterStack needs to be provided
# to check for levels in the Training and Testing data set.
TrainData1 <- prepareData(x=predictors, p=pres_train, b=backg_train,
factors=c("biome"), xy=FALSE)
TestData1 <- prepareData(x=predictors, p=pres_test, b=backg_test,
factors=c("biome"), xy=FALSE)
ensemble.factors1 <- ensemble.calibrate.models(x=predictors,
TrainData=TrainData1, TestData=TestData1,
p=pres_train, a=backg_train,
pt=pres_test, at=backg_test,
species.name="Bradypus",
SINK=TRUE,
ENSEMBLE.tune=TRUE,
ENSEMBLE.min=0.65,
MAXENT=0, MAXLIKE=1, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1,
GAMSTEP=1, MGCV=1, MGCVFIX=0, EARTH=1, RPART=1, NNET=1, FDA=1,
SVM=1, SVME=1, GLMNET=1,
BIOCLIM.O=1, BIOCLIM=1, DOMAIN=1, MAHAL=0, MAHAL01=1,
Yweights="BIOMOD", factors="biome",
PLOTS=FALSE, evaluations.keep=TRUE)
# compare different methods of calculating ensembles
ensemble.factors2 <- ensemble.calibrate.models(x=predictors,
TrainData=TrainData1, TestData=TestData1,
p=pres_train, a=backg_train,
pt=pres_test, at=backg_test,
species.name="Bradypus",
SINK=TRUE,
ENSEMBLE.tune=TRUE,
MAXENT=1, MAXLIKE=1, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1,
GAMSTEP=1, MGCV=1, MGCVFIX=1, EARTH=1, RPART=1, NNET=1, FDA=1,
SVM=1, SVME=1, BIOCLIM.O=1, BIOCLIM=1, DOMAIN=1, MAHAL=0, MAHAL01=1,
ENSEMBLE.best=c(4:10), ENSEMBLE.exponent=c(1, 2, 3),
Yweights="BIOMOD", factors="biome",
PLOTS=FALSE, evaluations.keep=TRUE)
# test performance of different suitability models
# data are split in 4 subsets, each used once for evaluation
ensemble.nofactors2 <- ensemble.calibrate.weights(x=predictors,
p=pres, a=background, k=4,
species.name="Bradypus",
SINK=TRUE, PROBIT=TRUE,
MAXENT=1, MAXLIKE=1, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1,
GAMSTEP=1, MGCV=1, MGCVFIX=1, EARTH=1, RPART=1, NNET=1, FDA=1,
SVM=1, SVME=1, BIOCLIM.O=1, BIOCLIM=1, DOMAIN=1, MAHAL=0, MAHAL01=1,
ENSEMBLE.tune=TRUE,
ENSEMBLE.best=0, ENSEMBLE.exponent=c(1, 2, 3),
ENSEMBLE.min=0.7,
Yweights="BIOMOD",
PLOTS=FALSE, formulae.defaults=TRUE)
ensemble.nofactors2$AUC.table
# test the result of leaving out one of the variables from the model
# note that positive differences indicate that the model without the variable
# has higher AUC than the full model
ensemble.variables <- ensemble.drop1(x=predictors,
p=pres, a=background, k=4,
species.name="Bradypus",
SINK=TRUE,
difference=TRUE,
VIF=TRUE, PROBIT=TRUE,
MAXENT=0, MAXLIKE=0, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1,
GAMSTEP=1, MGCV=1, MGCVFIX=1, EARTH=1, RPART=1, NNET=1, FDA=1,
SVM=1, SVME=1, GLMNET=1,
BIOCLIM.O=0, BIOCLIM=0, DOMAIN=1, MAHAL=0, MAHAL01=1,
ENSEMBLE.tune=TRUE,
ENSEMBLE.best=0, ENSEMBLE.exponent=c(1, 2, 3),
ENSEMBLE.min=0.7,
Yweights="BIOMOD", factors="biome")
ensemble.variables
# use function ensemble.VIF to select a subset of variables
# factor variables are not handled well by the function
# and therefore factors are removed
# however, one can check for factors with car::vif through
# the ensemble.calibrate.models function
# VIF.analysis$var.drops can be used as input for ensemble.calibrate.models or
# ensemble.calibrate.weights
predictors <- stack(predictor.files)
predictors <- subset(predictors, subset=c("bio1", "bio5", "bio6", "bio8",
"bio12", "bio16", "bio17", "biome"))
VIF.analysis <- ensemble.VIF(predictors, factors="biome")
VIF.analysis
# }
Run the code above in your browser using DataLab