# \donttest{
#' #====
#1. Mult detect for general data analysis using iris data
#===
# the outliers are introduced for testing purposes
irisdata1 <- iris
#introduce outlier data and NAs
rowsOutNA1 <- data.frame(x= c(344, NA,NA, NA),
x2 = c(34, 45, 544, NA),
x3= c(584, 5, 554, NA),
x4 = c(575, 4554,474, NA),
x5 =c('setosa', 'setosa', 'setosa', "setosa"))
colnames(rowsOutNA1) <- colnames(irisdata1)
dfinal <- rbind(irisdata1, rowsOutNA1)
#===========
setosadf <- dfinal[dfinal$Species%in%"setosa",c("Sepal.Width", 'Species')]
setosa_outlier_detection <- multidetect(data = setosadf,
var = 'Sepal.Width',
multiple = FALSE, #'one species
methods = c("adjbox", "iqr", "hampel","jknife",
"seqfences", "mixediqr",
"distboxplot", "semiqr",
"zscore", "logboxplot", "medianrule"),
silence_true_errors = FALSE,
missingness = 0.1,
sdm = FALSE,
na.inform = TRUE)
#======
#2.all species
#=====
multspp_outlier_detection <- multidetect(data = dfinal,
var = 'Sepal.Width',
multiple = TRUE, #'for multiple species or groups
var_col = "Species",
methods = c("adjbox", "iqr", "hampel","jknife",
"seqfences", "mixediqr",
"distboxplot", "semiqr",
"zscore", "logboxplot", "medianrule"),
silence_true_errors = FALSE,
missingness = 0.1,
sdm = FALSE,
na.inform = TRUE)
ggoutliers(multspp_outlier_detection)
#======
#3. Multidetect for environmental data
#======
#'Species data
data("abdata")
#area of interest
danube <- system.file('extdata/danube.shp.zip', package='specleanr')
db <- sf::st_read(danube, quiet=TRUE)
worldclim <- terra::rast(system.file('extdata/worldclim.tiff', package='specleanr'))
abpred <- pred_extract(data = abdata,
raster= worldclim ,
lat = 'decimalLatitude',
lon= 'decimalLongitude',
colsp = 'species',
bbox = db,
minpts = 10,
list=TRUE,
merge=FALSE)
about_df <- multidetect(data = abpred, multiple = FALSE,
var = 'bio6',
output = 'outlier',
exclude = c('x','y'),
methods = c('zscore', 'adjbox','iqr', 'semiqr','hampel', 'kmeans',
'logboxplot', 'lof','iforest', 'mahal', 'seqfences'))
ggoutliers(about_df)
#==========
#4. For mulitple species in species distribution models
#======
data("efidata")
data("jdsdata")
matchdata <- match_datasets(datasets = list(jds = jdsdata, efi=efidata),
lats = 'lat',
lons = 'lon',
species = c('speciesname','scientificName'),
date = c('Date', 'sampling_date'),
country = c('JDS4_site_ID'))
#extract data
rdata <- pred_extract(data = matchdata,
raster= worldclim ,
lat = 'decimalLatitude',
lon= 'decimalLongitude',
colsp = 'species',
bbox = db,
minpts = 10,
list=TRUE,
merge=FALSE)
#optimal ranges in the multidetect: made up
multspout_df <- multidetect(data = rdata, multiple = TRUE,
var = 'bio6',
output = 'outlier',
exclude = c('x','y'),
methods = c('zscore', 'adjbox','iqr', 'semiqr','hampel', 'kmeans',
'logboxplot', 'lof','iforest', 'mahal', 'seqfences'))
ggoutliers(multspout_df, "Anguilla anguilla")
#====================================
#5. use optimal ranges as a method
#create species ranges
#===================================
#max temperature of "Thymallus thymallus" is made up to make it appear in outliers
optdata <- data.frame(species= c("Phoxinus phoxinus", "Thymallus thymallus"),
mintemp = c(6, 1.6),maxtemp = c(20, 8.6),
meantemp = c(8.69, 8.4), #'ecoparam
direction = c('greater', 'greater'))
ttdata <- rdata["Thymallus thymallus"]
#even if one species, please indicate multiple to TRUE, since its picked from pred_extract function
thymallus_out_ranges <- multidetect(data = ttdata, multiple = TRUE,
var = 'bio1',
output = 'outlier',
exclude = c('x','y'),
methods = c('zscore', 'adjbox','iqr', 'semiqr','hampel', 'kmeans',
'logboxplot', 'lof','iforest', 'mahal', 'seqfences', 'optimal'),
optpar = list(optdf=optdata, optspcol = 'species',
mincol = "mintemp", maxcol = "maxtemp"))
ggoutliers(thymallus_out_ranges)
# }
Run the code above in your browser using DataLab