SportMiner is a comprehensive toolkit for mining, analyzing, and visualizing scientific literature in sport science domains. It provides an end-to-end workflow from data retrieval to publication-ready visualizations.
# From CRAN
install.packages("SportMiner")
# Development version from GitHub
devtools::install_github("praveenmaths89/SportMiner", subdir = "SportMiner")library(SportMiner)
# 1. Set your Scopus API key
sm_set_api_key("your_key_here")
# 2. Search for papers
papers <- sm_search_scopus(
query = 'TITLE-ABS-KEY("sport science" AND "machine learning")',
max_count = 100
)
# 3. Preprocess text
processed <- sm_preprocess_text(papers)
# 4. Create document-term matrix
dtm <- sm_create_dtm(processed)
# 5. Find optimal number of topics
k_selection <- sm_select_optimal_k(dtm, k_range = seq(5, 20, by = 5))
# 6. Train topic model
lda_model <- sm_train_lda(dtm, k = k_selection$optimal_k)
# 7. Visualize results
sm_plot_topic_terms(lda_model, n_terms = 10)
sm_plot_topic_frequency(lda_model, dtm)
# 8. Create keyword network
sm_keyword_network(papers, min_cooccurrence = 2)# Compare LDA, STM, and CTM
comparison <- sm_compare_models(dtm, k = 10)
# View metrics
print(comparison$metrics)
#> model coherence exclusivity combined_score
#> 1 LDA 0.542 0.678 0.321
#> 2 STM 0.589 0.712 0.854
#> 3 CTM 0.521 0.645 -0.175
# Recommendation
print(comparison$recommendation)
#> [1] "STM"papers$doc_id <- paste0("doc_", seq_len(nrow(papers)))
sm_plot_topic_trends(
model = lda_model,
dtm = dtm,
metadata = papers,
year_filter = 2015:2025
)library(ggplot2)
# All plots use theme_sportminer() by default
p <- sm_plot_topic_frequency(lda_model, dtm)
# Customize further
p + labs(
title = "Your Custom Title",
subtitle = "Based on N papers"
) + theme_sportminer(base_size = 14, grid = FALSE).Renviron file:usethis::edit_r_environ()
# Add this line:
# SCOPUS_API_KEY=your_key_hereSee the package vignette for detailed usage:
vignette("getting-started", package = "SportMiner")SportMiner adheres to strict CRAN standards:
tryCatch()message() and
warning(), not cat() or
print().data pronoun
from rlang to avoid R CMD check NOTEsAll plots use theme_sportminer(), which provides:
For bug reports and feature requests, please contact the package maintainer.
If you use SportMiner in your research, please cite:
citation("SportMiner")MIT Β© 2026 Praveen D Chougale and Usha Ananthakumar
This package builds on the excellent work of: