## All Citations
x <- c("Hello World (V. Raptor, 1986) bye",
"Narcissism is not dead (Rinker, 2014)",
"The R Core Team (2014) has many members.",
paste("Bunn (2005) said, \"As for elegance, R is refined, tasteful, and",
"beautiful. When I grow up, I want to marry R.\""),
"It is wrong to blame ANY tool for our own shortcomings (Baer, 2005).",
"Wickham's (in press) Tidy Data should be out soon.",
"Rinker's (n.d.) dissertation not so much.",
"I always consult xkcd comics for guidance (Foo, 2012; Bar, 2014).",
"Uwe Ligges (2007) says, \"RAM is cheap and thinking hurts\""
)
rm_citation(x)
rm_citation(x, extract=TRUE)
rm_citation(x, replacement="[CITATION HERE]")
qdapTools::vect2df(sort(table(unlist(rm_citation(x, extract=TRUE)))),
"citation", "count")
## In-Text
rm_citation(x, extract=TRUE, pattern="@rm_citation2")
## Parenthetical
rm_citation(x, extract=TRUE, pattern="@rm_citation3")
## Mining Citation
url_dl("http://umlreading.weebly.com/uploads/2/5/2/5/25253346/whole_language_timeline-updated.docx")
(txt <- read_docx("whole_language_timeline-updated.docx"))
library(qdapTools); library(ggplot2); library(qdap)
txt <- rm_non_ascii(txt)
parts <- split_vector(txt, split = "References", include = TRUE, regex=TRUE)
parts[[1]]
rm_citation(unbag(parts[[1]]), extract=TRUE)[[1]]
## By line
rm_citation(parts[[1]], extract=TRUE)
## Frequency
left_just(cites <- list2df(sort(table(rm_citation(unbag(parts[[1]]),
extract=TRUE)), T), "freq", "citation")[2:1])
## Distribution of citations (find locations and then plot)
cite_locs <- do.call(rbind, lapply(cites[[1]], function(x){
m <- gregexpr(x, unbag(parts[[1]]), fixed=TRUE)
data.frame(
citation=x,
start = m[[1]] -5,
end = m[[1]] + 5 + attributes(m[[1]])[["match.length"]]
)
}))
ggplot(cite_locs) +
geom_segment(aes(x=start, xend=end, y=citation, yend=citation), size=3,
color="yellow") +
xlab("Duration") +
scale_x_continuous(expand = c(0,0),
limits = c(0, nchar(unbag(parts[[1]])) + 25)) +
theme_grey() +
theme(
panel.grid.major=element_line(color="grey20"),
panel.grid.minor=element_line(color="grey20"),
plot.background = element_rect(fill="black"),
panel.background = element_rect(fill="black"),
panel.border = element_rect(colour = "grey50", fill=NA, size=1),
axis.text=element_text(color="grey50"),
axis.title=element_text(color="grey50")
)Run the code above in your browser using DataLab