The features of two objects, usually a partition defining a corpus of
interest, and a partition defining a reference corpus are compared.
The most important purpose is term extraction.
#' @rdname features-method
setMethod("features", "cooccurrences", function(x, y, included = FALSE, method = "ll", mc = TRUE, verbose = TRUE)
newObject <- new(
'compCooccurrences',
encoding = x@encoding, included = included, corpus = x@corpus, sizeCoi = x@partitionSize,
sizeRef = ifelse(included == FALSE, y@partitionSize, y@partitionSize - x@partitionSize),
stat = data.table()
)
if (identical(x@pAttribute, y@pAttribute) == FALSE)
warning("BEWARE: cooccurrences objects are not based on the same pAttribute!")
else
newObject@pAttribute <- x@pAttribute
if (verbose == TRUE) message("... preparing tabs for matching")
keys <- unlist(lapply(c("a", "b"), function(ab) paste(ab, x@pAttribute, sep="_")))
setkeyv(x@stat, keys)
setkeyv(y@stat, keys)
MATCH <- y@stat[x@stat]
# remove columns not needed
colsToDrop <- c(
"ll", "i.ll", "exp_window", "i.exp_window", "rank_ll", "i.rank_ll",
"size_window", "i.size_window", "count_a", "i.count_a", "count_b", "i.count_b",
"exp_partition", "i.exp_partition"
)
for (drop in colsToDrop) MATCH[, eval(drop) := NULL, with=TRUE]
setnames(MATCH, old=c("count_ab", "i.count_ab"), new=c("count_ref", "count_coi"))
if (included == TRUE) MATCH[, "count_ref" := MATCH[["count_ref"]] - MATCH[["count_coi"]] ]
newObject@stat <- MATCH
for (how in method)
if (verbose == TRUE) message("... statistical test: ", how)
newObject <- do.call(how, args = list(.Object = newObject))
newObject
)