x = c("Nor rain, wind, thunder, fire are my daughters.",
"When my information changes, I alter my conclusions.")
id = c("ws", "jmk")
# we split at each word
string_split2df(x, "[[:punct:] ]+")
# we add the 'id'
string_split2df(x, "[[:punct:] ]+", id = id)
# TO NOTE:
# - the second argument is `data`
# - when it is missing, the argument `split` becomes implicitly the second
# - ex: above we did not use `split = "[[:punct:] ]+"`
#
# using the formula
base = data.frame(text = x, my_id = id)
string_split2df(text ~ my_id, base, "[[:punct:] ]+")
#
# with 2+ identifiers
base = within(mtcars, carname <- rownames(mtcars))
# we have a message because the identifiers are not unique
string_split2df(carname ~ am + gear + carb, base, " +")
# adding the position of the words & removing the message
string_split2df(carname ~ am + gear + carb, base, " +", id_unik = FALSE, add.pos = TRUE)
Run the code above in your browser using DataLab