## ------------------------------------------------
## Method `DataCleaner$clean_file`
## ------------------------------------------------
# Start of environment setup code
# The level of detail in the information messages
ve <- 0
# The name of the folder that will contain all the files. It will be
# created in the current directory. NULL implies tempdir will be used
fn <- NULL
# The required files. They are default files that are part of the
# package
rf <- c("test.txt")
# An object of class EnvManager is created
em <- EnvManager$new(ve = ve, rp = "./")
# The required files are downloaded
ed <- em$setup_env(rf, fn)
# End of environment setup code
# The cleaned test file name
cfn <- paste0(ed, "/test-clean.txt")
# The test file name
fn <- paste0(ed, "/test.txt")
# The data cleaning options
dc_opts <- list("output_file" = cfn)
# The data cleaner object is created
dc <- DataCleaner$new(fn, dc_opts, ve = ve)
# The sample file is cleaned
dc$clean_file()
# The test environment is removed. Comment the below line, so the
# files generated by the function can be viewed
em$td_env()
## ------------------------------------------------
## Method `DataCleaner$clean_lines`
## ------------------------------------------------
# The level of detail in the information messages
ve <- 0
# Test data is read
l <- c(
"If you think I'm wrong, send me a link to where it's happened",
"We're about 90percent done with this room",
"This isn't how I wanted it between us.",
"Almost any cute breed can become ornamental",
"Once upon a time there was a kingdom with a castle",
"That's not a thing any of us are granted'",
"Why are you being so difficult? she asks."
)
# The expected results
res <- c(
"if you think wrong send me a link to where its happened",
"were about percent done with this room",
"this how i wanted it between us",
"almost any cute breed can become ornamental",
"once upon a time there was a kingdom with a castle",
"thats not a thing any of us are granted",
"why are you being so difficult she asks"
)
# The DataCleaner object is created
dc <- DataCleaner$new(ve = ve)
# The line is cleaned
cl <- dc$clean_lines(l)
# The cleaned lines are printed
print(cl)
Run the code above in your browser using DataLab