sdcProblem-class
objects that have common cellsprotectLinkedTables
can be used to protect
tables, that have common cells. It is of course required
that after the anonymization process has finished, all
common cells have the same anonymization state in both
tables.protectLinkedTables(objectA, objectB, commonCells, method, ...)
sdcProblem-class
objectsdcProblem-class
objectobjectB
. For each variable that
has one or more common codes in both tables, a list
element needs to be specified. HITAS
:HYPERCUBE
:OPT
:protectTable
.safeObj-class
objectprotectTable
# load micro data for further processing
sp <- searchpaths()
fn <- paste(sp[grep("sdcTable", sp)], "/data/microData2.RData", sep="")
microData <- get(load(fn))
# table1: defined by variables 'gender' and 'ecoOld'
microData1 <- microData[,c(2,3,5)]
# table2: defined by variables 'region', 'gender' and 'ecoNew'
microData2 <- microData[,c(1,2,4,5)]
# we need to create information on the hierarchies
# variable 'region': exists only in microDat2
dim.region <- data.frame(h=c('@','@@','@@'), l=c('Tot', 'R1','R2'))
# variable 'gender': exists in both datasets
dim.gender <- data.frame(h=c('@','@@','@@'), l=c('Tot', 'm','f'))
# variable 'ecoOld': exists only in microDat1
dim.ecoOld <- data.frame(
h=c('@','@@','@@@','@@@','@@','@@@','@@@'),
l=c('Tot','A','Aa','Ab','B','Ba','Bb'))
# variable 'ecoNew': exists only in microDat2
dim.ecoNew <- data.frame(
h=c('@','@@','@@@','@@@','@@@','@@','@@@','@@@','@@@'),
l=c('Tot','C','Ca','Cb','Cc','D','Da','Db','Dc'))
# creating objects holding information on dimensions
dimList1 <- list(gender=dim.gender, ecoOld=dim.ecoOld)
dimList2 <- list(region=dim.region, gender=dim.gender, ecoNew=dim.ecoNew)
# creating input objects for further processing. For details have a look at
# \code{\link{makeProblem}}.
problem1 <- makeProblem(data=microData1, dimList=dimList1, dimVarInd=c(1,2),
numVarInd=3)
problem2 <- makeProblem(data=microData2, dimList=dimList2, dimVarInd=c(1,2,3),
numVarInd=4)
# the cell specified by gender=='Tot' and ecoOld=='A'
# is one of the common cells! -> we mark it as primary suppression
problem1 <- changeCellStatus(problem1, characteristics=c('Tot', 'A'),
varNames=c('gender','ecoOld'), rule='u', verbose=FALSE)
# the cell specified by region=='Tot' and gender=='f' and ecoNew=='C'
# is one of the common cells! -> we mark it as primary suppression
problem2 <- changeCellStatus(problem2, characteristics=c('Tot', 'f', 'C'),
varNames=c('region','gender', 'ecoNew'), rule='u', verbose=FALSE)
# specifying input to define common cells
commonCells <- list()
# variable "gender"
commonCells$v.gender <- list()
commonCells$v.gender[[1]] <- 'gender' # variable name in 'problem1'
commonCells$v.gender[[2]] <- 'gender' # variable name in 'problem2'
# 'gender' has equal characteristics on both datasets -> keyword 'ALL'
commonCells$v.gender[[3]] <- 'ALL'
# variable: ecoOld and ecoNew
commonCells$v.eco <- list()
commonCells$v.eco[[1]] <- 'ecoOld' # variable name in 'problem1'
commonCells$v.eco[[2]] <- 'ecoNew' # variable name in 'problem2'
# vector of common characteristics: A and B in variable 'ecoOld' in 'problem1'
commonCells$v.eco[[3]] <- c("A","B")
# correspond to characteristics 'C' and 'D' in variable 'ecoNew' in 'problem2'
commonCells$v.eco[[4]] <- c("C","D")
# protect the linked data
result <- protectLinkedTables(problem1, problem2, commonCells, method='HITAS', verbose=TRUE)
# having a look at the results
result.tab1 <- result[[1]]
result.tab2 <- result[[2]]
summary(result.tab1)
summary(result.tab2)
Run the code above in your browser using DataLab