## Not run:
# # based on examples in the dismo package
#
# # get predictor variables
# library(dismo)
# predictor.files <- list.files(path=paste(system.file(package="dismo"), '/ex', sep=''),
# pattern='grd', full.names=TRUE)
# predictors <- stack(predictor.files)
# # subset based on Variance Inflation Factors
# predictors <- subset(predictors, subset=c("bio5", "bio6",
# "bio16", "bio17", "biome"))
# predictors
# predictors@title <- "base"
#
# # presence points
# presence_file <- paste(system.file(package="dismo"), '/ex/bradypus.csv', sep='')
# pres <- read.table(presence_file, header=TRUE, sep=',')[,-1]
#
# # the kfold function randomly assigns data to groups;
# # groups are used as calibration (1/5) and training (4/5) data
# groupp <- kfold(pres, 5)
# pres_train <- pres[groupp != 1, ]
# pres_test <- pres[groupp == 1, ]
#
# # choose background points
# ext <- extent(-90, -32, -33, 23)
# background <- randomPoints(predictors, n=1000, ext=ext, extf=1.00)
# colnames(background)=c('lon', 'lat')
# groupa <- kfold(background, 5)
# backg_train <- background[groupa != 1, ]
# backg_test <- background[groupa == 1, ]
#
# # formulae for random forest and generalized linear model
# # compare with: ensemble.formulae(predictors, factors=c("biome"))
#
# rfformula <- as.formula(pb ~ bio5+bio6+bio16+bio17)
#
# glmformula <- as.formula(pb ~ bio5 + I(bio5^2) + I(bio5^3) +
# bio6 + I(bio6^2) + I(bio6^3) + bio16 + I(bio16^2) + I(bio16^3) +
# bio17 + I(bio17^2) + I(bio17^3) )
#
# # fit four ensemble models (RF, GLM, BIOCLIM, DOMAIN)
# ensemble.nofactors <- ensemble.test(x=predictors, p=pres_train, a=backg_train,
# pt=pres_test, at=backg_test,
# species.name="Bradypus",
# MAXENT=0, GBM=0, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=0, GAM=0,
# GAMSTEP=0, MGCV=0, MGCVFIX=0,EARTH=0, RPART=0, NNET=0, FDA=0,
# SVM=0, SVME=0, BIOCLIM=1, DOMAIN=1, MAHAL=0,
# Yweights="BIOMOD", factors="biome",
# PLOTS=FALSE, evaluations.keep=TRUE,
# RF.formula=rfformula,
# GLM.formula=glmformula)
#
# # fit four ensemble models (RF, GLM, BIOCLIM, DOMAIN) using default formulae
# # variable 'biome' is not included as explanatory variable
# # results are provided in a file in the 'outputs' subfolder of the working
# # directory
# ensemble.nofactors <- ensemble.test(x=predictors,
# p=pres_train, a=backg_train,
# pt=pres_test, at=backg_test,
# layer.drops="biome",
# species.name="Bradypus",
# SINK=TRUE,
# MAXENT=0, GBM=0, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=0, GAM=0,
# GAMSTEP=0, MGCV=0, MGCVFIX=0, EARTH=0, RPART=0, NNET=0, FDA=0,
# SVM=0, SVME=0, BIOCLIM=1, DOMAIN=1, MAHAL=0,
# Yweights="BIOMOD", factors="biome",
# PLOTS=FALSE, evaluations.keep=TRUE,
# formulae.defaults=TRUE)
#
# # after fitting the individual algorithms (submodels),
# # transform predictions with a probit link.
# ensemble.nofactors <- ensemble.test(x=predictors,
# p=pres_train, a=backg_train,
# pt=pres_test, at=backg_test,
# layer.drops="biome",
# species.name="Bradypus",
# SINK=TRUE,
# ENSEMBLE.min=0.6,
# MAXENT=0, GBM=0, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=0, GAM=0,
# GAMSTEP=0, MGCV=0, MGCVFIX=0, EARTH=0, RPART=0, NNET=0, FDA=0,
# SVM=0, SVME=0, BIOCLIM=1, DOMAIN=1, MAHAL=0,
# PROBIT=TRUE,
# Yweights="BIOMOD", factors="biome",
# PLOTS=FALSE, evaluations.keep=TRUE,
# formulae.defaults=TRUE)
#
#
# # instead of providing presence and background locations, provide data.frames
# # because 'biome' is a factor, RasterStack and extent need to be provided
# # to check for levels in the Training and Testing data set
# TrainData1 <- prepareData(x=predictors, p=pres_train, b=backg_train,
# factors=c("biome"), xy=FALSE)
# TestData1 <- prepareData(x=predictors, p=pres_test, b=backg_test,
# factors=c("biome"), xy=FALSE)
# ensemble.factors1 <- ensemble.test(x=predictors, ext=ext,
# TrainData=TrainData1, TestData=TestData1,
# p=pres_train, a=backg_train,
# pt=pres_test, at=backg_test,
# species.name="Bradypus",
# SINK=TRUE,
# MAXENT=1, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1,
# GAMSTEP=1, MGCV=1, MGCVFIX=1, EARTH=1, RPART=1, NNET=1, FDA=1,
# SVM=1, SVME=1, BIOCLIM=1, DOMAIN=1, MAHAL=0,
# Yweights="BIOMOD", factors="biome",
# PLOTS=FALSE, evaluations.keep=TRUE)
#
# # compare different methods of calculating ensembles
# ensemble.factors2 <- ensemble.test(x=predictors, ext=ext,
# TrainData=TrainData1, TestData=TestData1,
# p=pres_train, a=backg_train,
# pt=pres_test, at=backg_test,
# species.name="Bradypus",
# SINK=TRUE,
# MAXENT=1, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1,
# GAMSTEP=1, MGCV=1, MGCVFIX=1, EARTH=1, RPART=1, NNET=1, FDA=1,
# SVM=1, SVME=1, BIOCLIM=1, DOMAIN=1, MAHAL=0,
# ENSEMBLE.best=c(4:10), ENSEMBLE.exponent=c(1, 2, 4, 6, 8),
# Yweights="BIOMOD", factors="biome",
# PLOTS=FALSE, evaluations.keep=TRUE)
#
# # test performance of different suitability models
# # data are split in 4 subsets, each used once for evaluation
# ensemble.nofactors2 <- ensemble.test.splits(x=predictors, ext=ext,
# p=pres, a=background, k=4,
# layer.drops=c("biome"),
# species.name="Bradypus",
# SINK=TRUE,
# MAXENT=1, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1,
# GAMSTEP=1, MGCV=1, MGCVFIX=1, EARTH=1, RPART=1, NNET=1, FDA=1,
# SVM=1, SVME=1, BIOCLIM=1, DOMAIN=1, MAHAL=0,
# ENSEMBLE.best=0, ENSEMBLE.exponent=c(1, 2, 4, 6, 8),
# ENSEMBLE.min=0.7,
# Yweights="BIOMOD", factors="biome",
# PLOTS=FALSE, formulae.defaults=TRUE,
# GBMSTEP.learning.rate=0.002)
# ensemble.nofactors2
#
# # test the result of leaving out one of the variables from the model
# # note that positive differences indicate that the model without the variable
# # has higher AUC than the full model
# ensemble.variables <- ensemble.drop1(x=predictors, ext=ext,
# p=pres, a=background, k=5,
# layer.drops=c("bio6", "bio1", "bio12"),
# species.name="Bradypus",
# SINK=TRUE,
# difference=TRUE,
# VIF=TRUE,
# MAXENT=0, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1,
# GAMSTEP=1, MGCV=1, MGCVFIX=1, EARTH=1, RPART=1, NNET=1, FDA=1,
# SVM=1, SVME=1, BIOCLIM=0, DOMAIN=0, MAHAL=0,
# ENSEMBLE.best=0, ENSEMBLE.exponent=c(1, 2, 4, 6, 8),
# ENSEMBLE.min=0.7,
# Yweights="BIOMOD", factors="biome",
# GBMSTEP.learning.rate=0.002)
# ensemble.variables
#
# ## End(Not run)
Run the code above in your browser using DataLab