#' # load example word embeddings
data(ft_wv_sample)
# load example text
data(jfk_speech)
# minimal preprocessing
jfk_speech$sentence <- tolower(jfk_speech$sentence)
jfk_speech$sentence <- gsub("[[:punct:]]+", " ", jfk_speech$sentence)
# create DTM
dtm <- dtm_builder(jfk_speech, sentence, sentence_id)
# create semantic directions
gen <- data.frame(
add = c("woman"),
subtract = c("man")
)
die <- data.frame(
add = c("alive"),
subtract = c("die")
)
gen_dir <- get_direction(anchors = gen, wv = ft_wv_sample)
die_dir <- get_direction(anchors = die, wv = ft_wv_sample)
sem_dirs <- rbind(gen_dir, die_dir)
classes <- CoCA(
dtm = dtm,
wv = ft_wv_sample,
directions = sem_dirs,
filter_sig = TRUE,
filter_value = 0.05,
zero_action = "drop"
)
print(classes)
Run the code above in your browser using DataLab