text = c('A B C', 'D E F. G H I', 'A D', 'GGG')
tc = create_tcorpus(text, doc_id = c('a','b','c','d'), split_sentences = TRUE)
tc$tokens
hits = search_contexts(tc, c('query label# A AND B', 'second query# (A AND Q) OR ("D E") OR I'))
hits ## print shows number of hits
hits$hits ## hits is a list, with hits$hits being a data.frame with specific contexts
summary(hits) ## summary gives hits per query
## sentence level
hits = search_contexts(tc, c('query label# A AND B', 'second query# (A AND Q) OR ("D E") OR I'),
context_level = 'sentence')
hits$hits ## hits is a list, with hits$hits being a data.frame with specific contexts
# \donttest{
## query language examples
## single term
search_contexts(tc, 'A')$hits
search_contexts(tc, 'G*')$hits ## wildcard *
search_contexts(tc, '*G')$hits ## wildcard *
search_contexts(tc, 'G*G')$hits ## wildcard *
search_contexts(tc, 'G?G')$hits ## wildcard ?
search_contexts(tc, 'G?')$hits ## wildcard ? (no hits)
## boolean
search_contexts(tc, 'A AND B')$hits
search_contexts(tc, 'A AND D')$hits
search_contexts(tc, 'A AND (B OR D)')$hits
search_contexts(tc, 'A NOT B')$hits
search_contexts(tc, 'A NOT (B OR D)')$hits
## sequence search (adjacent words)
search_contexts(tc, '"A B"')$hits
search_contexts(tc, '"A C"')$hits ## no hit, because not adjacent
search_contexts(tc, '"A (B OR D)"')$hits ## can contain nested OR
## cannot contain nested AND or NOT!!
search_contexts(tc, '')$hits ## can also use <> instead of "".
## proximity search (using ~ flag)
search_contexts(tc, '"A C"~5')$hits ## A AND C within a 5 word window
search_contexts(tc, '"A C"~1')$hits ## no hit, because A and C more than 1 word apart
search_contexts(tc, '"A (B OR D)"~5')$hits ## can contain nested OR
search_contexts(tc, '"A "~5')$hits ## can contain nested sequence (must use <>)
search_contexts(tc, '>~5')$hits ## (<> is always OK, but cannot nest quotes in quotes)
## cannot contain nested AND or NOT!!
## case sensitive search
search_contexts(tc, 'g')$hits ## normally case insensitive
search_contexts(tc, 'g~s')$hits ## use ~s flag to make term case sensitive
search_contexts(tc, '(a OR g)~s')$hits ## use ~s flag on everything between parentheses
search_contexts(tc, '(a OR G)~s')$hits ## use ~s flag on everything between parentheses
search_contexts(tc, '"a b"~s')$hits ## use ~s flag on everything between quotes
search_contexts(tc, '"A B"~s')$hits ## use ~s flag on everything between quotes
# }
Run the code above in your browser using DataLab