These methods should be used to get or set values of text objects
generated by functions like readCorpus
.
# S4 method for kRp.corpus
taggedText(obj)# S4 method for kRp.corpus
taggedText(obj) <- value
# S4 method for kRp.corpus
doc_id(obj, has_id = NULL)
# S4 method for kRp.corpus
describe(obj, doc_id = NULL, simplify = TRUE, ...)
# S4 method for kRp.corpus
describe(obj, doc_id = NULL, ...) <- value
# S4 method for kRp.corpus
language(obj)
# S4 method for kRp.corpus
language(obj) <- value
# S4 method for kRp.corpus
hasFeature(obj, feature = NULL)
# S4 method for kRp.corpus
hasFeature(obj, feature) <- value
# S4 method for kRp.corpus
feature(obj, feature, doc_id = NULL)
# S4 method for kRp.corpus
feature(obj, feature) <- value
# S4 method for kRp.corpus
corpusReadability(obj, doc_id = NULL)
# S4 method for kRp.corpus
corpusReadability(obj) <- value
corpusTm(obj)
# S4 method for kRp.corpus
corpusTm(obj)
corpusTm(obj) <- value
# S4 method for kRp.corpus
corpusTm(obj) <- value
corpusMeta(obj, meta = NULL, fail = TRUE)
# S4 method for kRp.corpus
corpusMeta(obj, meta = NULL, fail = TRUE)
corpusMeta(obj, meta = NULL) <- value
# S4 method for kRp.corpus
corpusMeta(obj, meta = NULL) <- value
# S4 method for kRp.corpus
corpusHyphen(obj, doc_id = NULL)
# S4 method for kRp.corpus
corpusHyphen(obj) <- value
# S4 method for kRp.corpus
corpusLexDiv(obj, doc_id = NULL)
# S4 method for kRp.corpus
corpusLexDiv(obj) <- value
# S4 method for kRp.corpus
corpusFreq(obj)
# S4 method for kRp.corpus
corpusFreq(obj) <- value
# S4 method for kRp.corpus
corpusCorpFreq(obj)
# S4 method for kRp.corpus
corpusCorpFreq(obj) <- value
corpusHierarchy(obj, ...)
# S4 method for kRp.corpus
corpusHierarchy(obj)
corpusHierarchy(obj) <- value
# S4 method for kRp.corpus
corpusHierarchy(obj) <- value
corpusFiles(obj, paths = FALSE, ...)
# S4 method for kRp.corpus
corpusFiles(obj, paths = FALSE)
corpusFiles(obj) <- value
# S4 method for kRp.corpus
corpusFiles(obj) <- value
corpusDocTermMatrix(obj, ...)
# S4 method for kRp.corpus
corpusDocTermMatrix(obj)
corpusDocTermMatrix(obj, terms = NULL, case.sens = NULL, tfidf = NULL) <- value
# S4 method for kRp.corpus
corpusDocTermMatrix(obj, terms = NULL, case.sens = NULL,
tfidf = NULL) <- value
# S4 method for kRp.corpus
corpusStopwords(obj)
# S4 method for kRp.corpus
corpusStopwords(obj) <- value
# S4 method for kRp.corpus
diffText(obj, doc_id = NULL)
# S4 method for kRp.corpus
diffText(obj) <- value
# S4 method for kRp.corpus
originalText(obj)
is.corpus(obj)
# S4 method for kRp.corpus,ANY,ANY,ANY
[(x, i, j, ..., drop = TRUE)
# S4 method for kRp.corpus,ANY,ANY,ANY
[(x, i, j, ...) <- value
# S4 method for kRp.corpus
[[(x, i, doc_id = NULL, ...)
# S4 method for kRp.corpus
[[(x, i, doc_id = NULL, ...) <- value
# S4 method for kRp.corpus
tif_as_tokens_df(tokens)
tif_as_corpus_df(corpus)
# S4 method for kRp.corpus
tif_as_corpus_df(corpus)
An object of class kRp.corpus
.
A new value to replace the current with.
A character vector with doc_id
s to look for in the object. The return value
is then a logical vector of the same length,
indicating if the respective id was found or not.
A character vector to limit the scope to one or more particular document IDs.
If TRUE
and result is a list of length 1, return the list element.
Additional arguments to pass through, depending on the method.
Character string naming the object feature to look for.
If not NULL, the meta
list entry of the given name.
Logical,
whether the method should fail with an error if meta
was not found.
If set to FALSE
, returns invisible(NULL)
instead.
Logical,
indicates for corpusFiles()
whether full paths should be returned, or just the actual file name.
A character string defining the tokens
used for calculating the matrix.
Stored in object's meta data slot.
Logical, whether terms were counted case sensitive. Stored in object's meta data slot.
Logical,
use TRUE
if the term frequency--inverse document frequency (tf-idf)
values were calculated instead of absolute frequency.
Stored in object's meta data slot.
See obj
.
Defines the row selector ([
) or the name to match ([[
) in the tokens slot.
Defines the column selector in the tokens slot.
See [
.
An object of class kRp.corpus
.
An object of class kRp.corpus
.
taggedText()
returns the tokens
slot.
describe()
returns the desc
slot.
hasFeature()
returns TRUE
or codeFALSE,
depending on whether the requested feature is present or not.
feature()
returns the list entry of the feat_list
slot for the requested feature.
corpusReadability()
returns the list of kRp.readability
objects.
corpusTm()
returns the VCorpus
object.
corpusMeta()
returns the list with meta information.
corpusHyphen()
returns the list of kRp.hyphen
objects.
corpusLexDiv()
returns the list of kRp.TTR
objects.
corpusFiles()
returns the character vector of file names of the object.
corpusFreq()
returns the frequency analysis data from the feat_list
slot.
corpusCorpFreq()
returns the kRp.corp.freq
object of the feat_list
slot.
corpusHierarchy()
returns the corpus' hierarchy structure.
corpusDocTermMatrix()
returns the sparse document term matrix of the feat_list
slot.
corpusStopwords()
returns the number of stopwords found in each text (if analyzed) from the feat_list
slot.
diffText()
returns the diff
element of the feat_list
slot.
originalText
regenerates the original text before text transformations and returns it as a data frame.
[
/[[
can be used as a shortcut to index the results of taggedText()
.
tif_as_corpus_df
returns the whole corpus in a single TIF[1] compliant
data.frame.
tif_as_tokens_df
returns the tokens
slot in a TIF[1] compliant
data.frame, i.e., doc_id
is not a factor but a character vector.
[1] Text Interchange Formats (https://github.com/ropensci/tif)
# NOT RUN {
# use readCorpus() to create an object of class kRp.corpus
# code is only run when the english language package can be loaded
if(require("koRpus.lang.en", quietly = TRUE)){
myCorpus <- readCorpus(
dir=file.path(
path.package("tm.plugin.koRpus"), "examples", "corpus", "Winner", "Wikipedia_new"
),
# use tokenize() so examples run without a TreeTagger installation
tagger="tokenize",
lang="en"
)
taggedText(myCorpus)
corpusMeta(myCorpus, "note") <- "an interesting read!"
# export object to TIF compliant data frame
myCorpus_df <- tif_as_corpus_df(myCorpus)
} else {}
# }
Run the code above in your browser using DataLab