if(requireNamespace("rnaturalearthdata")){
library("dplyr")
data(beesFlagged)
HomePath = tempdir()
# Tibble of common issues in country names and their replacements
commonProblems <- dplyr::tibble(problem = c('U.S.A.', 'US','USA','usa','UNITED STATES',
'United States','U.S.A','MX','CA','Bras.','Braz.','Brasil','CNMI','USA TERRITORY: PUERTO RICO'),
fix = c('United States of America','United States of America',
'United States of America','United States of America',
'United States of America','United States of America',
'United States of America','Mexico','Canada','Brazil','Brazil',
'Brazil','Northern Mariana Islands','Puerto Rico'))
beesFlagged <- beesFlagged %>%
# Replace a name to test
dplyr::mutate(country = stringr::str_replace_all(country, "Brazil", "Brasil"))
beesFlagged_out <- countryNameCleanR(
data = beesFlagged,
commonProblems = commonProblems)
suppressWarnings(
countryOutput <- jbd_CfC_chunker(data = beesFlagged_out,
lat = "decimalLatitude",
lon = "decimalLongitude",
country = "country",
# How many rows to process at a time
stepSize = 1000000,
# Start row
chunkStart = 1,
path = HomePath,
scale = "medium"),
classes = "warning")
# Left join these datasets
beesFlagged_out <- left_join(beesFlagged_out, countryOutput, by = "database_id") %>%
# merge the two country name columns into the "country" column
dplyr::mutate(country = dplyr::coalesce(country.x, country.y)) %>%
# remove the now redundant country columns
dplyr::select(!c(country.x, country.y)) %>%
# put the column back
dplyr::relocate(country) %>%
# Remove duplicates if they arose!
dplyr::distinct()
# Remove illegal characters
beesFlagged_out$country <- beesFlagged_out$country %>%
stringr::str_replace(., pattern = paste("\\[", "\\]", "\\?",
sep= "|"), replacement = "")
} # END if require
Run the code above in your browser using DataLab