# NOT RUN {
# Import data.
fname <- system.file("extdata", "finndiane.txt", package = "Numero")
dataset <- read.delim(file = fname)
# Create new versions for testing.
dsA <- dataset[1:250, c("INDEX","AGE","MALE","uALB")]
dsB <- dataset[151:300, c("INDEX","AGE","MALE","uALB","CHOL")]
dsC <- dataset[201:500, c("INDEX","AGE","MALE","DIAB_RETINO")]
# Select all rows.
results <- numero.clean(a = dsA, b = dsB, c = dsC, identity = "INDEX")
cat("\n\nNo selection:\n")
print(nrow(results$a))
print(nrow(results$b))
print(nrow(results$c))
# Select all rows and expanded for all identities.
results <- numero.clean(a = dsA, b = dsB, c = dsC, identity = "INDEX",
select = "union")
cat("\n\nUnion:\n")
print(nrow(results$a))
print(nrow(results$b))
print(nrow(results$c))
# Select only rows that are shared between all datasets.
results <- numero.clean(a = dsA, b = dsB, c = dsC, identity = "INDEX",
select = "intersection")
cat("\n\nIntersection:\n")
print(nrow(results$a))
print(nrow(results$b))
print(nrow(results$c))
# Select only rows with a unique INDEX ('dsB' has none).
results <- numero.clean(a = dsA, b = dsB, c = dsC, identity = "INDEX",
select = "exclusion")
cat("\n\nExclusion:\n")
print(nrow(results$a))
print(nrow(results$b))
print(nrow(results$c))
# Add extra identification information.
dsA$GROUP <- "A"
dsB$GROUP <- "B"
dsC$GROUP <- "C"
# Select rows with a unique identifier.
results <- numero.clean(a = dsA, b = dsB, c = dsC,
identity = c("GROUP","INDEX"),
select = "exclusion")
cat("\n\nMulti-identities:\n")
print(nrow(results$a))
print(nrow(results$b))
print(nrow(results$c))
# }
Run the code above in your browser using DataLab