num <- c(100,
90, 10,
80, 20,
70, 30,
50, 25, 25,
40, 20, 20, 20,
25, 25, 25, 25)
v1 <- c("v1",
rep(c("v2", "v3", "v4"), each = 2),
rep("v5", 3),
rep(c("v6", "v7"), each = 4))
sweight <- c(1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1)
d <- data.frame(v1 = v1, num = num, sweight = sweight)
# basic use
SuppressDominantCells(d, n = c(1,2), k = c(80,70), numVar = "num", formula = ~v1 -1)
SuppressDominantCells(d, k = c(80,70), numVar = "num", formula = ~v1 -1) # same as above
SuppressDominantCells(d, pPercent = 7, numVar = "num", formula = ~v1 -1)
# with weights
SuppressDominantCells(d, n = c(1,2), k = c(80,70), numVar = "num",
dimVar = "v1", sWeightVar = "sweight")
# overwriting some parameters in default spec
SuppressDominantCells(d, n = c(1,2), k = c(80,70), numVar = "num",
dimVar = "v1", sWeightVar = "sweight", domWeightMethod = "tauargus")
# using dominance and few contributors rule together, see second example compared to first
SuppressDominantCells(d, n = c(1,2), k = c(80,70), numVar = "num", formula = ~v1 -1,
primary = c(DominanceRule, NContributorsRule), maxN = 3, allDominance = TRUE)
SuppressDominantCells(d, n = c(1,2), k = c(80,70), numVar = "num", formula = ~v1 -1,
primary = c(DominanceRule, NContributorsRule), maxN = 4, allDominance = TRUE)
d2 <- SSBtoolsData("d2")[1:4] # Data considered as microdata
set.seed(123)
d2$v <- rnorm(nrow(d2))^2
# Hierarchical region variables are detected automatically -> same output column
SuppressDominantCells(data = d2, n = c(1, 2), k = c(70, 95), numVar = "v",
dimVar = c("region", "county", "k_group"), allDominance = TRUE)
# Formula. Hierarchical variables still detected automatically.
SuppressDominantCells(data = d2, n = c(1, 2), k = c(70, 95), numVar = "v",
formula = ~main_income * k_group + region + county - k_group)
# With hierarchies created manually
ml <- data.frame(levels = c("@", "@@", "@@@", "@@@", "@@@", "@@"),
codes = c("Total", "not_assistance", "other", "pensions", "wages", "assistance"))
SuppressDominantCells(data = d2, n = c(1, 2), k = c(70, 95), numVar = "v",
hierarchies = list(main_income = ml, k_group = "Total_Norway"))
# With contributorVar and p% rule
SuppressDominantCells(data= SSBtoolsData("magnitude1"),
numVar = "value",
dimVar= c("sector4", "geo"),
contributorVar = "company",
pPercent = 10,
allDominance = TRUE)
# Using formula followed by FormulaSelection
output <- SuppressDominantCells(data = SSBtoolsData("magnitude1"),
numVar = "value",
formula = ~sector2 * geo + sector4 * eu,
contributorVar = "company",
k = c(80, 99))
FormulaSelection(output, ~sector2 * geo)
# This example is similar to the one in the documentation of tables_by_formulas,
# but it uses SuppressDominantCells with the pPercent and contributorVar parameters.
tables_by_formulas(SSBtoolsData("magnitude1"),
table_fun = SuppressDominantCells,
table_formulas = list(table_1 = ~region * sector2,
table_2 = ~region1:sector4 - 1,
table_3 = ~region + sector4 - 1),
substitute_vars = list(region = c("geo", "eu"), region1 = "eu"),
collapse_vars = list(sector = c("sector2", "sector4")),
dominanceVar = "value", pPercent = 10, contributorVar = "company")
# Example using the dummy_aggregate parameters together with an extra
# primary rule. A cell becomes primary if the maximum input value
# exceeds 60% of the cell value.
SuppressDominantCells(data = SSBtoolsData("magnitude1"),
dominanceVar = "value",
formula = ~sector2 * geo + sector4 * eu,
contributorVar = "company",
pPercent = 3,
primary = c(MagnitudeRule,
function(..., da_out, num){da_out[[1]]/num[[1]]>0.6}),
da_fun = c(mAx = function(x) suppressWarnings(max(x))),
da_vars = c(mAx = "value"),
da_args = list(name_sep = "__"))
# More advanced example using dummy_aggregate parameters.
# The default primary function (MagnitudeRule) is removed.
# A cell becomes primary if the maximum input value exceeds 70% of
# the cell value, or if the number of contributions from a single
# company exceeds 55% of the total number of contributions.
# Change default preAggregate to speed up.
SuppressDominantCells(data = SSBtoolsData("magnitude1")[c(1:3, 1:20),],
dominanceVar = "value",
formula = ~sector2 * geo + sector4 * eu,
primary = function(..., da_out, num, freq){
da_out$value_max/num$value>0.7 | da_out$company_freq_max/freq>0.55},
da_fun = c(max = max, freq_max = function(x){max(table(x))}),
da_vars = c(max = "value", freq_max = "company"),
preAggregate = TRUE, # Since default FALSE without contributorVar
extraAggregate = FALSE, # Not needed since preAggregate and no contributorVar
singletonMethod = "none")
Run the code above in your browser using DataLab