## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, eval = any(dir.exists(c("working_example_data", "benchmark_data", "new_benchmark_data", "topic_data", "valid_data", "new_stage_data"))), comment = "#>", warning = FALSE, fig.width = 6, fig.height = 6 ) ## ----results = FALSE, message=FALSE, warning=FALSE---------------------------- #install.packages("CiteSource") library(CiteSource) ## ----------------------------------------------------------------------------- citation_files <- list.files(path = "topic_data", pattern = "\\.ris", full.names = TRUE) citations <- read_citations(citation_files, cite_sources = c("crimjust", "lens", "psycinfo", "pubmed", "scopus"), tag_naming = "best_guess") ## ----results = FALSE, message=FALSE, warning=FALSE---------------------------- unique_citations <- dedup_citations(citations) n_unique <- count_unique(unique_citations) source_comparison <- compare_sources(unique_citations, comp_type = "sources") ## ----------------------------------------------------------------------------- plot_source_overlap_heatmap(source_comparison) ## ----------------------------------------------------------------------------- plot_source_overlap_heatmap(source_comparison, plot_type = "percentages") ## ----------------------------------------------------------------------------- plot_source_overlap_upset(source_comparison, decreasing = c(TRUE, TRUE)) ## ----------------------------------------------------------------------------- plot_contributions(n_unique, center = TRUE) ## ----------------------------------------------------------------------------- unique_lens <- n_unique |> dplyr::filter(cite_source == "lens", unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id") unique_psycinfo <- n_unique |> dplyr::filter(cite_source == "psycinfo", unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id") unique_pubmed <- n_unique |> dplyr::filter(cite_source == "pubmed", unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id") unique_crimjust <- n_unique |> dplyr::filter(cite_source == "crimjust", unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id") unique_scopus <- n_unique |> dplyr::filter(cite_source == "scopus", unique == TRUE) |> dplyr::inner_join(unique_citations, by = "duplicate_id") ## ----------------------------------------------------------------------------- scopus_journals <- unique_scopus |> dplyr::group_by(journal) |> dplyr::summarise(count = dplyr::n()) |> dplyr::arrange(dplyr::desc(count)) knitr::kable(scopus_journals[1:10, ]) ## ----results=FALSE------------------------------------------------------------ unique_scopus |> dplyr::group_by(year) |> dplyr::summarise(count = dplyr::n()) |> ggplot2::ggplot(ggplot2::aes(year, count, group = 1)) + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::xlab("Publication year") + ggplot2::ylab("Unique records") ## ----message=FALSE------------------------------------------------------------ all_unique <- dplyr::bind_rows(unique_scopus, unique_lens, unique_pubmed, unique_psycinfo) all_unique |> dplyr::group_by(cite_source.x, year) |> dplyr::summarise(count = dplyr::n()) |> ggplot2::ggplot(ggplot2::aes(year, count, group = 1)) + ggplot2::geom_line() + ggplot2::geom_point() + ggplot2::facet_wrap(~ cite_source.x) + ggplot2::xlab("Publication year") + ggplot2::ylab("Unique records") ## ----------------------------------------------------------------------------- #export_csv(unique_citations, filename = "unique-by-source.csv", separate = "cite_source") #export_ris(unique_citations, filename = "unique_citations.ris", source_field = "DB", label_field = "N1") #export_bib(unique_citations, filename = "unique_citations.bib", include = c("sources", "labels", "strings")) #reimport_csv("unique-by-source.csv")