#Create test dataset
clean <- data.frame(dataset = rep("clean", 1000),
                    decimalLongitude = runif(min = -43, max = -40, n = 1000),
                    decimalLatitude = runif(min = -13, max = -10, n = 1000))
                    
bias.long <- c(round(runif(min = -42, max = -40, n = 500), 1),
               round(runif(min = -42, max = -40, n = 300), 0),
               runif(min = -42, max = -40, n = 200))
bias.lat <- c(round(runif(min = -12, max = -10, n = 500), 1),
              round(runif(min = -12, max = -10, n = 300), 0),
              runif(min = -12, max = -10, n = 200))
bias <- data.frame(dataset = rep("biased", 1000),
                   decimalLongitude = bias.long,
                   decimalLatitude = bias.lat)
test <- rbind(clean, bias)
if (FALSE) {                  
#run clean_dataset
flags <- clean_dataset(test)
#check problems
#clean
hist(test[test$dataset == rownames(flags[flags$summary,]), "decimalLongitude"])
#biased
hist(test[test$dataset == rownames(flags[!flags$summary,]), "decimalLongitude"])
}
Run the code above in your browser using DataLab