if (FALSE) {
# Load bibliometric data
file <- "https://www.bibliometrix.org/datasets/savedrecs.txt"
M <- convert2df(file, dbsource = "wos", format = "plaintext")
# Apply citation normalization
results <- applyCitationMatching(M, threshold = 0.85)
# View top cited works (after normalization)
head(results$summary, 20)
# See how many variants were found for the top citation
top_citation <- results$summary$CR_canonical[1]
variants <- subset(results$full_data, CR_canonical == top_citation)
unique(variants$CR)
# Replace original CR with normalized CR in the data frame
M_normalized <- M %>%
rename(CR_orig = CR) %>%
left_join(results$CR_normalized, by = "SR")
# Compare citation counts before and after normalization
original_citations <- strsplit(M$CR, ";") %>%
unlist() %>%
trimws() %>%
table() %>%
length()
normalized_citations <- nrow(results$summary)
cat("Original unique citations:", original_citations, "\n")
cat("After normalization:", normalized_citations, "\n")
cat("Duplicates found:", original_citations - normalized_citations, "\n")
# Use normalized data for further analysis
CR_analysis <- citations(M_normalized, field = "article", sep = ";")
}
Run the code above in your browser using DataLab