## ---------------------------------------------------------------------
  ## hasLetterAt()
  ## ---------------------------------------------------------------------
  x <- DNAStringSet(c("AAACGT", "AACGT", "ACGT", "TAGGA"))
  hasLetterAt(x, "AAAAAA", 1:6)
  ## hasLetterAt() can be used to answer questions like: "which elements
  ## in 'x' have an A at position 2 and a G at position 4?"
  q1 <- hasLetterAt(x, "AG", c(2, 4))
  which(rowSums(q1) == 2)
  ## or "how many probes in the drosophila2 chip have T, G, T, A at
  ## position 2, 4, 13 and 20, respectively?"
  library(drosophila2probe)
  probes <- DNAStringSet(drosophila2probe)
  q2 <- hasLetterAt(probes, "TGTA", c(2, 4, 13, 20))
  sum(rowSums(q2) == 4)
  ## or "what's the probability to have an A at position 25 if there is
  ## one at position 13?"
  q3 <- hasLetterAt(probes, "AACGT", c(13, 25, 25, 25, 25))
  sum(q3[ , 1] & q3[ , 2]) / sum(q3[ , 1])
  ## Probabilities to have other bases at position 25 if there is an A
  ## at position 13:
  sum(q3[ , 1] & q3[ , 3]) / sum(q3[ , 1])  # C
  sum(q3[ , 1] & q3[ , 4]) / sum(q3[ , 1])  # G
  sum(q3[ , 1] & q3[ , 5]) / sum(q3[ , 1])  # T
  ## See ?nucleotideFrequencyAt for another way to get those results.
  ## ---------------------------------------------------------------------
  ## neditAt() / isMatchingAt() / which.isMatchingAt()
  ## ---------------------------------------------------------------------
  subject <- DNAString("GTATA")
  ## Pattern "AT" matches subject "GTATA" at position 3 (exact match)
  neditAt("AT", subject, at=3)
  isMatchingAt("AT", subject, at=3)
  ## ... but not at position 1
  neditAt("AT", subject)
  isMatchingAt("AT", subject)
  ## ... unless we allow 1 mismatching letter (inexact match)
  isMatchingAt("AT", subject, max.mismatch=1)
  ## Here we look at 6 different starting positions and find 3 matches if
  ## we allow 1 mismatching letter
  isMatchingAt("AT", subject, at=0:5, max.mismatch=1)
  ## No match
  neditAt("NT", subject, at=1:4)
  isMatchingAt("NT", subject, at=1:4)
  ## 2 matches if N is interpreted as an ambiguity (fixed=FALSE)
  neditAt("NT", subject, at=1:4, fixed=FALSE)
  isMatchingAt("NT", subject, at=1:4, fixed=FALSE)
  ## max.mismatch != 0 and fixed=FALSE can be used together
  neditAt("NCA", subject, at=0:5, fixed=FALSE)
  isMatchingAt("NCA", subject, at=0:5, max.mismatch=1, fixed=FALSE)
  some_starts <- c(10:-10, NA, 6)
  subject <- DNAString("ACGTGCA")
  is_matching <- isMatchingAt("CAT", subject, at=some_starts, max.mismatch=1)
  some_starts[is_matching]
  which.isMatchingAt("CAT", subject, at=some_starts, max.mismatch=1)
  which.isMatchingAt("CAT", subject, at=some_starts, max.mismatch=1,
                     follow.index=TRUE)
  ## ---------------------------------------------------------------------
  ## WITH INDELS
  ## ---------------------------------------------------------------------
  subject <- BString("ABCDEFxxxCDEFxxxABBCDE")
  neditAt("ABCDEF", subject, at=9)
  neditAt("ABCDEF", subject, at=9, with.indels=TRUE)
  isMatchingAt("ABCDEF", subject, at=9, max.mismatch=1, with.indels=TRUE)
  isMatchingAt("ABCDEF", subject, at=9, max.mismatch=2, with.indels=TRUE)
  neditAt("ABCDEF", subject, at=17)
  neditAt("ABCDEF", subject, at=17, with.indels=TRUE)
  neditEndingAt("ABCDEF", subject, ending.at=22)
  neditEndingAt("ABCDEF", subject, ending.at=22, with.indels=TRUE)
Run the code above in your browser using DataLab