ensemble.test.splits
, ensemble.test
and ensemble.raster
.
ensemble.batch(x = NULL, xn = c(x), ext = NULL, species.presence = NULL, species.absence = NULL, presence.min = 20, an = 1000, excludep = FALSE, CIRCLES.at = FALSE, CIRCLES.d = 100000, k.splits = 4, k.test = 0, n.ensembles = 1, SINK = FALSE, RASTER.format = "raster", RASTER.datatype = "INT2S", RASTER.NAflag = -32767, KML.out = FALSE, KML.maxpixels = 100000, KML.blur = 10, models.save = FALSE, threshold.method = "spec_sens", threshold.sensitivity = 0.9, threshold.PresenceAbsence = FALSE, ENSEMBLE.best = 0, ENSEMBLE.min = 0.7, ENSEMBLE.exponent = 1, ENSEMBLE.weight.min = 0.05, input.weights = NULL, MAXENT = 1, GBM = 1, GBMSTEP = 1, RF = 1, GLM = 1, GLMSTEP = 1, GAM = 1, GAMSTEP = 1, MGCV = 1, MGCVFIX = 0, EARTH = 1, RPART = 1, NNET = 1, FDA = 1, SVM = 1, SVME = 1, BIOCLIM = 1, DOMAIN = 1, MAHAL = 1, PROBIT = FALSE, AUC.weights = TRUE, Yweights = "BIOMOD", layer.drops = NULL, factors = NULL, dummy.vars = NULL, formulae.defaults = TRUE, maxit = 100, MAXENT.a = NULL, MAXENT.an = 10000, MAXENT.BackData = NULL, MAXENT.path = paste(getwd(), "/models/maxent", sep=""), GBM.formula = NULL, GBM.n.trees = 2001, GBMSTEP.gbm.x = 2:(1 + raster::nlayers(x)), GBMSTEP.tree.complexity = 5, GBMSTEP.learning.rate = 0.005, GBMSTEP.bag.fraction = 0.5, GBMSTEP.step.size = 100, RF.formula = NULL, RF.ntree = 751, RF.mtry = floor(sqrt(raster::nlayers(x))), GLM.formula = NULL, GLM.family = binomial(link = "logit"), GLMSTEP.steps = 1000, STEP.formula = NULL, GLMSTEP.scope = NULL, GLMSTEP.k = 2, GAM.formula = NULL, GAM.family = binomial(link = "logit"), GAMSTEP.steps = 1000, GAMSTEP.scope = NULL, GAMSTEP.pos = 1, MGCV.formula = NULL, MGCV.select = FALSE, MGCVFIX.formula = NULL, EARTH.formula = NULL, EARTH.glm = list(family = binomial(link = "logit"), maxit = maxit), RPART.formula = NULL, RPART.xval = 50, NNET.formula = NULL, NNET.size = 8, NNET.decay = 0.01, FDA.formula = NULL, SVM.formula = NULL, SVME.formula = NULL, MAHAL.shape = 1)
ensemble.mean(RASTER.species.name = "Species001", RASTER.stack.name = "base", positive.filters = c("grd", "_ENSEMBLE_"), negative.filters = c("xml"), RASTER.format = "raster", RASTER.datatype = "INT2S", RASTER.NAflag = -32767, KML.out = FALSE, KML.maxpixels = 100000, KML.blur = 10, p = NULL, a = NULL, pt = NULL, at = NULL, threshold = -1, threshold.method = "spec_sens", threshold.sensitivity = 0.9, threshold.PresenceAbsence = FALSE)
ensemble.plot(RASTER.species.name = "Species001", RASTER.stack.name = "base", plot.method = "suitability", dev.new.width = 7, dev.new.height = 7, main = paste(RASTER.species.name, " ", plot.method, " for ", RASTER.stack.name, sep=""), positive.filters = c("grd","_MEAN_"), negative.filters = c("xml"), p=NULL, a=NULL, threshold = -1, threshold.method = "spec_sens", threshold.sensitivity = 0.9, threshold.PresenceAbsence = FALSE, abs.breaks = 6, pres.breaks = 6, maptools.boundaries = TRUE, maptools.col = "dimgrey", ...)
stack
) containing all layers to calibrate an ensemble. xn
and the selection of background points to a sub-region of x
, typically provided as c(lonmin, lonmax, latmin, latmax); see also predict
, randomPoints
and extent
randomPoints
in case argument a
or species.absence
is missing TRUE
) that presence points will be excluded from the background points; see also randomPoints
TRUE
, then new background points that will be used for evaluationg the suitability models will be selected (randomPoints
) in circular neighbourhoods (created with circles
) around presence locations (p and pt). circles
) around presence locations (p and pt). k=5
results in 4/5 of presence and background points to be used for calibrating the models, and 1/5 of presence and background points to be used for evaluating the models). See also kfold
. ensemble.test.splits
step in batch processing. See also kfold
. k=5
results in 4/5 of presence and background points to be used for calibrating the models, and 1/5 of presence and background points to be used for evaluating the models). See also kfold
. TRUE
). The name of file is based on species names. In case a file already exists, then results are appended. See also sink
. writeFormats
and writeRaster
. dataType
and writeRaster
. writeRaster
. FALSE
, then no kml layers (layers that can be shown in Google Earth) are produced. If TRUE
, then kml files will be saved in a subfolder 'kml'. KML
. KML.blur^2
, which may help avoid blurring of isolated pixels. See also KML
. TRUE
). The filename will be species.name
with extension .models
; this file will be saved in subfolder of models
. When loading this file, model results will be available as ensemble.models
. spec_sens
(highest sum of the true positive rate and the true negative rate), kappa
(highest kappa value), no_omission
(highest threshold that corresponds to no omission), prevalence
(modeled prevalence is closest to observed prevalence) and equal_sens_spec
(equal true positive rate and true negative rate). See threshold
. Options specific to the BiodiversityR implementation are: threshold.mean
(resulting in calculating the mean value of spec_sens
, equal_sens_spec
and prevalence
) and threshold.min
(resulting in calculating the minimum value of spec_sens
, equal_sens_spec
and prevalence
). threshold.method = 'sensitivity'
. See threshold
. TRUE
calculate thresholds with the PresenceAbsence package. See optimal.thresholds
. ensemble.strategy
is called internally to determine weights for the ensemble model. ensemble.strategy
is called internally to determine weights for the ensemble model. ENSEMBLE.min
typically refers to input AUC values. NULL
then values provided by parameters such as MAXENT
and GBM
will be used. As an alternative, the output from ensemble.test.splits
can be used. maxent
). (Only weights > 0 will be used.) gbm
). (Only weights > 0 will be used.) gbm.step
). (Only weights > 0 will be used.) randomForest
). (Only weights > 0 will be used.) glm
). (Only weights > 0 will be used.) stepAIC
). (Only weights > 0 will be used.) gam
). (Only weights > 0 will be used.) step.gam
). (Only weights > 0 will be used.) gam
). (Only weights > 0 will be used.) gam
) will be fitted among ensemble earth
). (Only weights > 0 will be used.) rpart
). (Only weights > 0 will be used.) nnet
). (Only weights > 0 will be used.) fda
). (Only weights > 0 will be used.) ksvm
). (Only weights > 0 will be used.) svm
). (Only weights > 0 will be used.) bioclim
). (Only weights > 0 will be used.) domain
). (Only weights > 0 will be used.) mahal
). (Only weights > 0 will be used.) TRUE
, then subsequently to the fitting of the individual algorithm (e.g. maximum entropy or GAM) a generalized linear model (glm
) with probit link family=binomial(link="probit")
will be fitted to transform the predictions, using the previous predictions as explanatory variable. This transformation results in all model predictions to be probability estimates. TRUE
, then use the average of the AUC for the different submodels in the different crossvalidation runs as weights for the 'full' ensemble model. See ensemble.test.splits
for details. "BIOMOD"
results in equal weighting of all presence and all background cases, "equal"
results in equal weighting of all cases. The user can supply a vector of weights similar to the number of cases in the calibration data set. x
. See also addLayer
. prepareData
TRUE
). See also ensemble.formulae
. glm.control
, gam.control
, gam.control
and nnet
. maxent
), typically available in 2-column (lon, lat) dataframe; see also prepareData
and extract
. Ignored if MAXENT.BackData
is provided. randomPoints
in case argument MAXENT.a
is missing. When used with the ensemble.batch
function, the same background locations will be used for each of the species runs; this implies that for each species, presence locations are not excluded from the background data for this function. maxent
). When used with the ensemble.batch
function, the same background locations will be used for each of the cross-validation runs; this is based on the assumption that a large number (~10000) of background locations are used. maxent
gbm
gbm
gbm.step
gbm.step
gbm.step
gbm.step
gbm.step
randomForest
randomForest
randomForest
glm
glm
stepAIC
stepAIC
stepAIC
stepAIC
gam
gam
step.gam
step.gam
gam
TRUE
, then the smoothing parameter estimation that is part of fitting can completely remove terms from the model; see also gam
earth
rpart
rpart.control
nnet
nnet
nnet
fda
ksvm
svm
mahal
. See details section. prepareData
and extract
prepareData
and extract
prepareData
prepareData
and extract
p
and absence a
locations. suitability
plots suitability maps, presence
plots presence-absence maps and count
plots count maps (count of number of algorithms or number of ensembles predicting presence). dev.new
). If < 0, then no new graphics device is opened. dev.new
). If < 0, then no new graphics device is opened. suitability
mapping). suitability
mapping). TRUE
, then plot approximate country boundaries wrld_simpl
wrld_simpl
plot
. ensemble.test.splits
, ensemble.test
and ensemble.raster
. ensemble.test.splits
results in a cross-validation procedure whereby the data set is split in calibration and testing subsets and the best weights for the ensemble model are determined (including the possibility for weights = 0).
ensemble.test
is the step whereby models are calibrated using all the available presence data.
ensemble.raster
is the final step whereby raster layers are produced for the ensemble model.
Function ensemble.mean
results in raster layers that are based on the summary of several ensemble layers: the new ensemble has probability values that are the mean of the probabilities of the different raster layers, the presence-absence threshold is derived for this new ensemble layer, whereas the count reflects the number of ensemble layers where presence was predicted. Note the assumption that input probabilities are scaled between 0 and 1000 (as the output from ensemble.raster
), whereas thresholds are based on actual probabilities (scaled between 0 and 1).
Function ensemble.plot
plots suitability, presence-absence or count maps. In the case of suitability maps, the presence-absence threshold needs to be provide as suitabilities smaller than the threshold will be coloured red to orange, whereas suitabilities larger than the threshold will be coloured light blue to dark blue.
ensemble.test.splits
, ensemble.test
, ensemble.raster
## Not run:
# # based on examples in the dismo package
#
# # get predictor variables
# library(dismo)
# predictor.files <- list.files(path=paste(system.file(package="dismo"), '/ex', sep=''),
# pattern='grd', full.names=TRUE)
# predictors <- stack(predictor.files)
# # subset based on Variance Inflation Factors
# predictors <- subset(predictors, subset=c("bio5", "bio6",
# "bio16", "bio17", "biome"))
# predictors
# predictors@title <- "base"
#
# # presence points
# presence_file <- paste(system.file(package="dismo"), '/ex/bradypus.csv', sep='')
# pres <- read.table(presence_file, header=TRUE, sep=',')
# pres[,1] <- rep("Bradypus", nrow(pres))
#
# # choose background points
# ext <- extent(-90, -32, -33, 23)
# background <- randomPoints(predictors, n=1000, ext=ext, extf = 1.00)
#
# # north and south for new predictions (as if new climates)
# ext2 <- extent(-90, -32, 0, 23)
# predictors2 <- crop(predictors, y=ext2)
# predictors2@title <- "north"
#
# ext3 <- extent(-90, -32, -33, 0)
# predictors3 <- crop(predictors, y=ext3)
# predictors3@title <- "south"
#
# # fit 3 ensembles with batch processing, choosing the best ensemble model based on the
# # average AUC of 4-fold split of calibration and testing data
# # final models use all available presence data and average weights determined by the
# # ensemble.test.splits function (called internally)
# # batch processing can handle several species by using 3-column species.presence and
# # species.absence data sets
# # note that these calculations can take a while
#
# ensemble.nofactors <- ensemble.batch(x=predictors, ext=ext,
# xn=c(predictors2, predictors3),
# species.presence=pres,
# species.absence=background,
# k.splits=4, k.test=0,
# n.ensembles=3,
# SINK=TRUE,
# layer.drops=c("biome"),
# ENSEMBLE.best=0, ENSEMBLE.exponent=c(1, 2, 4, 6, 8),
# ENSEMBLE.min=0.7,
# MAXENT=1, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1, GAMSTEP=0, MGCV=1,
# EARTH=1, RPART=1, NNET=1, FDA=1, SVM=1, SVME=1, BIOCLIM=1, DOMAIN=1, MAHAL=0,
# Yweights="BIOMOD",
# formulae.defaults=TRUE)
#
# # summaries for the 3 ensembles for the species
# # summaries are based on files in folders ensemble, ensemble/presence and
# # ensemble/count
#
# ensemble.mean(RASTER.species.name="Bradypus", RASTER.stack.name="base",
# p=pres, a=background,
# KML.out=T)
#
# # plot mean suitability
# plot1 <- ensemble.plot(RASTER.species.name="Bradypus", RASTER.stack.name="base",
# plot.method="suitability",
# p=pres, a=background, abs.breaks=4, pres.breaks=9)
# plot1
#
# ## End(Not run)
Run the code above in your browser using DataLab