# NOT RUN {
require("RcppCWB")
registry_tmp <- file.path(normalizePath(tempdir(), winslash = "/"), "cwb", "registry", fsep = "/")
data_dir_tmp <- file.path(
normalizePath(tempdir(), winslash = "/"),
"cwb", "indexed_corpora", "reuters", fsep = "/"
)
corpus_copy(
corpus = "REUTERS",
registry_dir = system.file(package = "RcppCWB", "extdata", "cwb", "registry"),
data_dir = system.file(package = "RcppCWB", "extdata", "cwb", "indexed_corpora", "reuters"),
registry_dir_new = registry_tmp,
data_dir_new = data_dir_tmp
)
no_strucs <- cl_attribute_size(
corpus = "REUTERS",
attribute = "id", attribute_type = "s",
registry = registry_tmp
)
cpos_list <- lapply(
0L:(no_strucs - 1L),
function(i)
cl_struc2cpos(corpus = "REUTERS", struc = i, s_attribute = "id", registry = registry_tmp)
)
cpos_matrix <- do.call(rbind, cpos_list)
s_attribute_encode(
values = as.character(1L:nrow(cpos_matrix)),
data_dir = data_dir_tmp,
s_attribute = "foo",
corpus = "REUTERS",
region_matrix = cpos_matrix,
method = "R",
registry_dir = registry_tmp,
encoding = "latin1",
verbose = TRUE,
delete = TRUE
)
cl_struc2str(
"REUTERS", struc = 0L:(nrow(cpos_matrix) - 1L), s_attribute = "foo", registry = registry_tmp
)
unlink(registry_tmp, recursive = TRUE)
unlink(data_dir_tmp, recursive = TRUE)
avs <- s_attribute_get_values(
s_attribute = "id",
data_dir = system.file(package = "RcppCWB", "extdata", "cwb", "indexed_corpora", "reuters")
)
rng <- s_attribute_get_regions(
s_attribute = "id",
data_dir = system.file(package = "RcppCWB", "extdata", "cwb", "indexed_corpora", "reuters")
)
x <- data.frame(
cpos_left = c(1L, 5L, 10L, 20L, 25L),
cpos_right = c(2L, 5L, 12L, 21L, 27L),
value = c("ORG", "LOC", "ORG", "PERS", "ORG"),
stringsAsFactors = FALSE
)
y <- data.frame(
cpos_left = c(5, 11, 20, 25L, 30L),
cpos_right = c(5, 12, 22, 27L, 33L),
value = c("LOC", "ORG", "ORG", "ORG", "ORG"),
stringsAsFactors = FALSE
)
s_attribute_merge(x,y)
# }
Run the code above in your browser using DataCamp Workspace