# NOT RUN {
corrections <- data.frame(
bad = c("foubar", "foobr", "fubar", "unknown", ".missing"),
good = c("foobar", "foobar", "foobar", ".na", "missing"),
stringsAsFactors = FALSE
)
corrections
# create some fake data
my_data <- c(letters[1:5], sample(corrections$bad[-5], 10, replace = TRUE))
my_data[sample(6:15, 2)] <- NA # with missing elements
match_vec(my_data, corrections)
# You can use regular expressions to simplify your list
corrections <- data.frame(
bad = c(".regex f[ou][^m].+?r$", "unknown", ".missing"),
good = c("foobar", ".na", "missing"),
stringsAsFactors = FALSE
)
# You can also set a default value
corrections_with_default <- rbind(corrections, c(bad = ".default", good = "unknown"))
corrections_with_default
# a warning will be issued about the data that were converted
match_vec(my_data, corrections_with_default)
# use the warn_default = FALSE, if you are absolutely sure you don't want it.
match_vec(my_data, corrections_with_default, warn_default = FALSE)
# The function will give you a warning if the dictionary does not
# match the data
match_vec(letters, corrections)
# The can be used for translating survey output
words <- data.frame(
option_code = c(".regex ^[yY][eE]?[sS]?",
".regex ^[nN][oO]?",
".regex ^[uU][nN]?[kK]?",
".missing"),
option_name = c("Yes", "No", ".na", "Missing"),
stringsAsFactors = FALSE
)
match_vec(c("Y", "Y", NA, "No", "U", "UNK", "N"), words)
# }
Run the code above in your browser using DataLab