# NOT RUN {
# load test data
testFile <- file.path(path.package("PPRL"), "extdata/testdata.csv")
testData <- read.csv(testFile, head = FALSE, sep = "\t", colClasses = "character")
# create Bloom Filters
testDataBF <- CreateBF(ID = testData$V1, testData$V7,
k = 20, padding = 1, q = 2, l = 1000, password = "(H]$6Uh*-Z204q")
# define bloom filter column in data and select similarity function and threshold using
# multibit trees
lBF <- SelectSimilarityFunctionBF("CLKs","CLKs", method = "mtan", threshold = 0.85, symdex = TRUE,
leaflimit = 3, cores = 1)
# or
# define bloom filter column in data and select similarity function and threshold using
# canopy clustering
lBF <- SelectSimilarityFunctionBF("CLKs","CLKs", method = "CCtan", threshold = 0.85,
looseThreshold = 0.7, tightThreshold = 0.8)
# or
# define bloom filter column in data and select similarity function and threshold using
# sorted neighbourhood
lBF <- SelectSimilarityFunctionBF("CLKs","CLKs", method = "SNtan", threshold = 0.85, windowSize = 5)
# calculate result (in this example data is linked to itself)
linked <- BloomFilterLinkage(testDataBF$ID, testDataBF, testDataBF$ID, testDataBF,
blocking = NULL, similarity = lBF)
# }
Run the code above in your browser using DataLab