Search
- Find terms located in columns of a data
frame.
boolean_search
- Conducts a Boolean search for
terms/strings within a character vector.
bs
- Binary operator version of
boolean_search
.Search(dataframe, term, column.name = NULL, max.distance = 0.02, ...)
boolean_search(text.var, terms, ignore.case = TRUE, values = FALSE,
exclude = NULL, apostrophe.remove = FALSE, char.keep = NULL,
digit.remove = FALSE)
text.var %bs% terms
agrep
.AND
or &&
to connect terms together) and OR
(use OR
or ||
to allow for searches of
either set of TRUE
case is
ignored.TRUE
removes
apostrophes from the text before examining.termco
attempts to auto detect
characters to keep baTRUE
strips
digits from the text before counting.
termco
attempts to auto detect if
digits should be retained based on the elements in
match.list
.Search
- Returns the rows of the data frame that
match the search term.
boolean_search
- Returns the values (or indices) of
a vector of strings that match given terms.trans_context
termco
## Dataframe search:
(SampDF <- data.frame("islands"=names(islands)[1:32],mtcars, row.names=NULL))
Search(SampDF, "Cuba", "islands")
Search(SampDF, "New", "islands")
Search(SampDF, "Ho")
Search(SampDF, "Ho", max.distance = 0)
Search(SampDF, "Axel Heiberg")
Search(SampDF, 19) #too much tolerance in max.distance
Search(SampDF, 19, max.distance = 0)
Search(SampDF, 19, "qsec", max.distance = 0)
##Boolean search:
boolean_search(DATA$state, " I ORliar&&stinks")
boolean_search(DATA$state, " I &&.", values=TRUE)
boolean_search(DATA$state, " I OR.", values=TRUE)
boolean_search(DATA$state, " I &&.")
## Exclusion:
boolean_search(DATA$state, " I ||.", values=TRUE)
boolean_search(DATA$state, " I ||.", exclude = c("way", "truth"), values=TRUE)
## From stackoverflow: http://stackoverflow.com/q/19640562/1000343
dat <- data.frame(x = c("Doggy", "Hello", "Hi Dog", "Zebra"), y = 1:4)
z <- data.frame(z =c("Hello", "Dog"))
dat[boolean_search(dat$x, paste(z$z, collapse = "OR")), ]
## Binary operator version
dat[dat$x %bs% paste(z$z, collapse = "OR"), ]
## Passing to `trans_context`
inds <- boolean_search(DATA.SPLIT$state, " I&&.|| I&&!", ignore.case = FALSE)
with(DATA.SPLIT, trans_context(state, person, inds=inds))
(inds2 <- boolean_search(raj$dialogue, spaste(paste(negation.words,
collapse = " || "))))
trans_context(raj$dialogue, raj$person, inds2)
Run the code above in your browser using DataLab