d <- data.frame(a1 = 1:10,
a2 = c(1:9, NA),
b1 = "b",
b2 = NA,
c1 = rep(c(TRUE, FALSE), 5),
c2 = rep(c(TRUE, NA), 5),
d = c(rep(TRUE, 4), rep(FALSE, 4), NA, NA))
# Remove columns that are constant (threshold = 1)
remove_almost_constant(d, .threshold = 1.0, .na_rm = FALSE)
remove_almost_constant(d, .threshold = 1.0, .na_rm = TRUE)
# Remove columns where the majority value occurs in >= 50% of rows
remove_almost_constant(d, .threshold = 0.5, .na_rm = FALSE)
remove_almost_constant(d, .threshold = 0.5, .na_rm = TRUE)
# Restrict check to a subset of columns
remove_almost_constant(d, a1:b2, .threshold = 0.5, .na_rm = TRUE)
Run the code above in your browser using DataLab