ensemble.test.splits, ensemble.test and ensemble.raster.
ensemble.batch(x = NULL, xn = c(x), ext = NULL, species.presence = NULL, species.absence = NULL, presence.min = 20, an = 1000, excludep = FALSE, CIRCLES.at = FALSE, CIRCLES.d = 100000, k.splits = 4, k.test = 0, n.ensembles = 1, SINK = FALSE, RASTER.format = "raster", RASTER.datatype = "INT2S", RASTER.NAflag = -32767, KML.out = FALSE, KML.maxpixels = 100000, KML.blur = 10, models.save = FALSE, threshold.method = "spec_sens", threshold.sensitivity = 0.9, threshold.PresenceAbsence = FALSE, ENSEMBLE.best = 0, ENSEMBLE.min = 0.7, ENSEMBLE.exponent = 1, ENSEMBLE.weight.min = 0.05, input.weights = NULL, MAXENT = 1, GBM = 1, GBMSTEP = 1, RF = 1, GLM = 1, GLMSTEP = 1, GAM = 1, GAMSTEP = 1, MGCV = 1, MGCVFIX = 0, EARTH = 1, RPART = 1, NNET = 1, FDA = 1, SVM = 1, SVME = 1, BIOCLIM = 1, DOMAIN = 1, MAHAL = 1, PROBIT = FALSE, AUC.weights = TRUE, Yweights = "BIOMOD", layer.drops = NULL, factors = NULL, dummy.vars = NULL, formulae.defaults = TRUE, maxit = 100, MAXENT.a = NULL, MAXENT.an = 10000, MAXENT.BackData = NULL, MAXENT.path = paste(getwd(), "/models/maxent", sep=""), GBM.formula = NULL, GBM.n.trees = 2001, GBMSTEP.gbm.x = 2:(1 + raster::nlayers(x)), GBMSTEP.tree.complexity = 5, GBMSTEP.learning.rate = 0.005, GBMSTEP.bag.fraction = 0.5, GBMSTEP.step.size = 100, RF.formula = NULL, RF.ntree = 751, RF.mtry = floor(sqrt(raster::nlayers(x))), GLM.formula = NULL, GLM.family = binomial(link = "logit"), GLMSTEP.steps = 1000, STEP.formula = NULL, GLMSTEP.scope = NULL, GLMSTEP.k = 2, GAM.formula = NULL, GAM.family = binomial(link = "logit"), GAMSTEP.steps = 1000, GAMSTEP.scope = NULL, GAMSTEP.pos = 1, MGCV.formula = NULL, MGCV.select = FALSE, MGCVFIX.formula = NULL, EARTH.formula = NULL, EARTH.glm = list(family = binomial(link = "logit"), maxit = maxit), RPART.formula = NULL, RPART.xval = 50, NNET.formula = NULL, NNET.size = 8, NNET.decay = 0.01, FDA.formula = NULL, SVM.formula = NULL, SVME.formula = NULL, MAHAL.shape = 1)
ensemble.mean(RASTER.species.name = "Species001", RASTER.stack.name = "base", positive.filters = c("grd", "_ENSEMBLE_"), negative.filters = c("xml"), RASTER.format = "raster", RASTER.datatype = "INT2S", RASTER.NAflag = -32767, KML.out = FALSE, KML.maxpixels = 100000, KML.blur = 10, p = NULL, a = NULL, pt = NULL, at = NULL, threshold = -1, threshold.method = "spec_sens", threshold.sensitivity = 0.9, threshold.PresenceAbsence = FALSE)
ensemble.plot(RASTER.species.name = "Species001", RASTER.stack.name = "base", plot.method = "suitability", dev.new.width = 7, dev.new.height = 7, main = paste(RASTER.species.name, " ", plot.method, " for ", RASTER.stack.name, sep=""), positive.filters = c("grd","_MEAN_"), negative.filters = c("xml"), p=NULL, a=NULL, threshold = -1, threshold.method = "spec_sens", threshold.sensitivity = 0.9, threshold.PresenceAbsence = FALSE, abs.breaks = 6, pres.breaks = 6, maptools.boundaries = TRUE, maptools.col = "dimgrey", ...)stack) containing all layers to calibrate an ensemble. xn and the selection of background points to a sub-region of x, typically provided as c(lonmin, lonmax, latmin, latmax); see also predict, randomPoints and extent randomPoints in case argument a or species.absence is missing TRUE) that presence points will be excluded from the background points; see also randomPoints TRUE, then new background points that will be used for evaluationg the suitability models will be selected (randomPoints) in circular neighbourhoods (created with circles) around presence locations (p and pt). circles) around presence locations (p and pt). k=5 results in 4/5 of presence and background points to be used for calibrating the models, and 1/5 of presence and background points to be used for evaluating the models). See also kfold. ensemble.test.splits step in batch processing. See also kfold. k=5 results in 4/5 of presence and background points to be used for calibrating the models, and 1/5 of presence and background points to be used for evaluating the models). See also kfold. TRUE). The name of file is based on species names. In case a file already exists, then results are appended. See also sink. writeFormats and writeRaster. dataType and writeRaster. writeRaster. FALSE, then no kml layers (layers that can be shown in Google Earth) are produced. If TRUE, then kml files will be saved in a subfolder 'kml'. KML. KML.blur^2, which may help avoid blurring of isolated pixels. See also KML. TRUE). The filename will be species.name with extension .models; this file will be saved in subfolder of models. When loading this file, model results will be available as ensemble.models. spec_sens (highest sum of the true positive rate and the true negative rate), kappa (highest kappa value), no_omission (highest threshold that corresponds to no omission), prevalence (modeled prevalence is closest to observed prevalence) and equal_sens_spec (equal true positive rate and true negative rate). See threshold. Options specific to the BiodiversityR implementation are: threshold.mean (resulting in calculating the mean value of spec_sens, equal_sens_spec and prevalence) and threshold.min (resulting in calculating the minimum value of spec_sens, equal_sens_spec and prevalence). threshold.method = 'sensitivity'. See threshold. TRUE calculate thresholds with the PresenceAbsence package. See optimal.thresholds. ensemble.strategy is called internally to determine weights for the ensemble model. ensemble.strategy is called internally to determine weights for the ensemble model. ENSEMBLE.min typically refers to input AUC values. NULL then values provided by parameters such as MAXENT and GBM will be used. As an alternative, the output from ensemble.test.splits can be used. maxent). (Only weights > 0 will be used.) gbm). (Only weights > 0 will be used.) gbm.step). (Only weights > 0 will be used.) randomForest). (Only weights > 0 will be used.) glm). (Only weights > 0 will be used.) stepAIC). (Only weights > 0 will be used.) gam). (Only weights > 0 will be used.) step.gam). (Only weights > 0 will be used.) gam). (Only weights > 0 will be used.) gam) will be fitted among ensemble earth). (Only weights > 0 will be used.) rpart). (Only weights > 0 will be used.) nnet). (Only weights > 0 will be used.) fda). (Only weights > 0 will be used.) ksvm). (Only weights > 0 will be used.) svm). (Only weights > 0 will be used.) bioclim). (Only weights > 0 will be used.) domain). (Only weights > 0 will be used.) mahal). (Only weights > 0 will be used.) TRUE, then subsequently to the fitting of the individual algorithm (e.g. maximum entropy or GAM) a generalized linear model (glm) with probit link family=binomial(link="probit") will be fitted to transform the predictions, using the previous predictions as explanatory variable. This transformation results in all model predictions to be probability estimates. TRUE, then use the average of the AUC for the different submodels in the different crossvalidation runs as weights for the 'full' ensemble model. See ensemble.test.splits for details. "BIOMOD" results in equal weighting of all presence and all background cases, "equal" results in equal weighting of all cases. The user can supply a vector of weights similar to the number of cases in the calibration data set. x. See also addLayer. prepareData TRUE). See also ensemble.formulae. glm.control, gam.control, gam.control and nnet. maxent), typically available in 2-column (lon, lat) dataframe; see also prepareData and extract. Ignored if MAXENT.BackData is provided. randomPoints in case argument MAXENT.a is missing. When used with the ensemble.batch function, the same background locations will be used for each of the species runs; this implies that for each species, presence locations are not excluded from the background data for this function. maxent). When used with the ensemble.batch function, the same background locations will be used for each of the cross-validation runs; this is based on the assumption that a large number (~10000) of background locations are used. maxent gbm gbm gbm.step gbm.step gbm.step gbm.step gbm.step randomForest randomForest randomForest glm glm stepAIC stepAIC stepAIC stepAIC gam gam step.gam step.gam gam TRUE, then the smoothing parameter estimation that is part of fitting can completely remove terms from the model; see also gam earth rpart rpart.control nnet nnet nnet fda ksvm svm mahal. See details section. prepareData and extract prepareData and extract prepareData prepareData and extract p and absence a locations. suitability plots suitability maps, presence plots presence-absence maps and count plots count maps (count of number of algorithms or number of ensembles predicting presence). dev.new). If < 0, then no new graphics device is opened. dev.new). If < 0, then no new graphics device is opened. suitability mapping). suitability mapping). TRUE, then plot approximate country boundaries wrld_simpl wrld_simpl plot. ensemble.test.splits, ensemble.test and ensemble.raster. ensemble.test.splits results in a cross-validation procedure whereby the data set is split in calibration and testing subsets and the best weights for the ensemble model are determined (including the possibility for weights = 0).
ensemble.test is the step whereby models are calibrated using all the available presence data.
ensemble.raster is the final step whereby raster layers are produced for the ensemble model.
Function ensemble.mean results in raster layers that are based on the summary of several ensemble layers: the new ensemble has probability values that are the mean of the probabilities of the different raster layers, the presence-absence threshold is derived for this new ensemble layer, whereas the count reflects the number of ensemble layers where presence was predicted. Note the assumption that input probabilities are scaled between 0 and 1000 (as the output from ensemble.raster), whereas thresholds are based on actual probabilities (scaled between 0 and 1).
Function ensemble.plot plots suitability, presence-absence or count maps. In the case of suitability maps, the presence-absence threshold needs to be provide as suitabilities smaller than the threshold will be coloured red to orange, whereas suitabilities larger than the threshold will be coloured light blue to dark blue.
ensemble.test.splits, ensemble.test, ensemble.raster
## Not run:
# # based on examples in the dismo package
#
# # get predictor variables
# library(dismo)
# predictor.files <- list.files(path=paste(system.file(package="dismo"), '/ex', sep=''),
# pattern='grd', full.names=TRUE)
# predictors <- stack(predictor.files)
# # subset based on Variance Inflation Factors
# predictors <- subset(predictors, subset=c("bio5", "bio6",
# "bio16", "bio17", "biome"))
# predictors
# predictors@title <- "base"
#
# # presence points
# presence_file <- paste(system.file(package="dismo"), '/ex/bradypus.csv', sep='')
# pres <- read.table(presence_file, header=TRUE, sep=',')
# pres[,1] <- rep("Bradypus", nrow(pres))
#
# # choose background points
# ext <- extent(-90, -32, -33, 23)
# background <- randomPoints(predictors, n=1000, ext=ext, extf = 1.00)
#
# # north and south for new predictions (as if new climates)
# ext2 <- extent(-90, -32, 0, 23)
# predictors2 <- crop(predictors, y=ext2)
# predictors2@title <- "north"
#
# ext3 <- extent(-90, -32, -33, 0)
# predictors3 <- crop(predictors, y=ext3)
# predictors3@title <- "south"
#
# # fit 3 ensembles with batch processing, choosing the best ensemble model based on the
# # average AUC of 4-fold split of calibration and testing data
# # final models use all available presence data and average weights determined by the
# # ensemble.test.splits function (called internally)
# # batch processing can handle several species by using 3-column species.presence and
# # species.absence data sets
# # note that these calculations can take a while
#
# ensemble.nofactors <- ensemble.batch(x=predictors, ext=ext,
# xn=c(predictors2, predictors3),
# species.presence=pres,
# species.absence=background,
# k.splits=4, k.test=0,
# n.ensembles=3,
# SINK=TRUE,
# layer.drops=c("biome"),
# ENSEMBLE.best=0, ENSEMBLE.exponent=c(1, 2, 4, 6, 8),
# ENSEMBLE.min=0.7,
# MAXENT=1, GBM=1, GBMSTEP=0, RF=1, GLM=1, GLMSTEP=1, GAM=1, GAMSTEP=0, MGCV=1,
# EARTH=1, RPART=1, NNET=1, FDA=1, SVM=1, SVME=1, BIOCLIM=1, DOMAIN=1, MAHAL=0,
# Yweights="BIOMOD",
# formulae.defaults=TRUE)
#
# # summaries for the 3 ensembles for the species
# # summaries are based on files in folders ensemble, ensemble/presence and
# # ensemble/count
#
# ensemble.mean(RASTER.species.name="Bradypus", RASTER.stack.name="base",
# p=pres, a=background,
# KML.out=T)
#
# # plot mean suitability
# plot1 <- ensemble.plot(RASTER.species.name="Bradypus", RASTER.stack.name="base",
# plot.method="suitability",
# p=pres, a=background, abs.breaks=4, pres.breaks=9)
# plot1
#
# ## End(Not run)
Run the code above in your browser using DataLab