## align sequence of letters
smith_waterman("Joske Vermeulen", "Jiske Vermoelen")
smith_waterman("Joske Vermeulen", "Ik zoek naar Jiske Versmoelen, waar is die te vinden")
smith_waterman("Joske", "Jiske")
smith_waterman("Joske", "Jiske",
similarity = function(x, y) ifelse(x == y | (x == "o" & y == "i"), 2L, -1L))
## align sequence of words
a <- "The answer is blowin' in the wind."
b <- "As the Bob Dylan song says, the answer is blowing in the wind."
smith_waterman(a, b)
smith_waterman(a, b, type = "characters")
smith_waterman(a, b, type = "words")
smith_waterman(a, b, type = "words", similarity = function(x, y) adist(x, y))
smith_waterman(a, b, type = "words",
tokenizer = function(x) unlist(strsplit(x, "[[:space:]]")))
x <- smith_waterman(a, b, type = "words")
x$b$tokens[x$b$alignment$from:x$b$alignment$to]
# examples on aligning text files
a <- system.file(package = "text.alignment", "extdata", "example1.txt")
a <- readLines(a)
a <- paste(a, collapse = "\n")
b <- system.file(package = "text.alignment", "extdata", "example2.txt")
b <- readLines(b)
b <- paste(b, collapse = "\n")
smith_waterman(a, b, type = "characters")
smith_waterman(a, b, type = "words")
smith_waterman("Gistel Hof", b, type = "characters")
smith_waterman("Bailiestraat", b, type = "characters")
smith_waterman("Lange rei", b, type = "characters")
# examples on extracting where elements were found
x <- smith_waterman("Lange rei", b)
x$b$tokens[x$b$alignment$from:x$b$alignment$to]
as.data.frame(x)
as.data.frame(x, alignment_id = "alignment-ab")
x <- lapply(c("Lange rei", "Gistel Hof", NA, "Test"), FUN = function(a){
x <- smith_waterman(a, b)
x <- as.data.frame(x)
x
})
x <- do.call(rbind, x)
x
Run the code above in your browser using DataLab