## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval = FALSE-------------------------------------------------------------
# # if (!require("remotes", quietly = TRUE))
# #     install.packages("remotes")
# #
# # remotes::install_github("statdivlab/radEmu")

## ----setup, message = FALSE---------------------------------------------------
library(magrittr)
library(dplyr)
library(ggplot2)
library(stringr)
library(radEmu)

## ----message = FALSE----------------------------------------------------------
library(parallel)

## -----------------------------------------------------------------------------
# load in sample data
data("wirbel_sample")
# set group to be a factor with levels CTR for control and CRC for cancer
wirbel_sample$Group <- factor(wirbel_sample$Group, levels = c("CTR","CRC"))
# load in abundance data 
data("wirbel_otu")
# save mOTU names
mOTU_names <- colnames(wirbel_otu)
# consider taxa in the following genera
chosen_genera <- c("Eubacterium", "Faecalibacterium", "Fusobacterium", "Porphyromonas")
# get taxonomy information from mOTU names
mOTU_name_df <- data.frame(name = mOTU_names) %>% 
  mutate(base_name = stringr::str_remove(mOTU_names, "unknown ") %>%
                      stringr::str_remove("uncultured ")) %>%
  mutate(genus_name = stringr::word(base_name, 1))
# restrict to names in chosen genera
restricted_mOTU_names <- mOTU_name_df %>%
  filter(genus_name %in% chosen_genera) %>%
  pull(name)
# pull out observations from a chinese study within the meta-analysis
ch_study_obs <- which(wirbel_sample$Country %in% c("CHI"))
# make count matrix for chosen samples and genera
small_Y <- wirbel_otu[ch_study_obs, restricted_mOTU_names]
# check for samples with only zero counts
sum(rowSums(small_Y) == 0) # no samples have a count sum of 0 
# check for genera with only zero counts
sum(colSums(small_Y) == 0) # one category has a count sum of 0
# remove the one genus with only zero counts
category_to_rm <- which(colSums(small_Y) == 0)
small_Y <- small_Y[, -category_to_rm]

## -----------------------------------------------------------------------------
ch_fit <- emuFit(formula = ~ Group, 
                 data = wirbel_sample[ch_study_obs, ],
                 Y = small_Y,
                 run_score_tests = FALSE) 

## -----------------------------------------------------------------------------
mOTU_to_test <- which(str_detect(restricted_mOTU_names, "7116"))
ch_fit$B %>% rownames
covariate_to_test <- which("GroupCRC" == ch_fit$B %>% rownames)
robust_score <- emuFit(formula = ~ Group,
                       data = wirbel_sample[ch_study_obs, ],
                       fitted_model = ch_fit,
                       refit = FALSE,
                       test_kj = data.frame(k = covariate_to_test, 
                                            j = mOTU_to_test), 
                       Y = small_Y)
robust_score$coef$pval[mOTU_to_test]

## ----eval = FALSE-------------------------------------------------------------
# ncores <- parallel::detectCores() - 1
# ncores

## ----eval = FALSE-------------------------------------------------------------
# emuTest <- function(category) {
#   score_res <- emuFit(formula = ~ Group,
#                        data = wirbel_sample[ch_study_obs, ],
#                        fitted_model = ch_fit,
#                        refit = FALSE,
#                        test_kj = data.frame(k = covariate_to_test,
#                                             j = category),
#                        Y = small_Y)
#   return(score_res)
# }

## ----eval = FALSE-------------------------------------------------------------
# if (.Platform$OS.type != "windows" & !identical(Sys.getenv("GITHUB_ACTIONS"), "true")) {
#   # run if we are on a Mac or Linux machine
#   score_res <- mclapply(1:5,
#                       emuTest,
#                       mc.cores = ncores)
# } else {
#   # don't run if we are on a Windows machine, or if testing with GitHub actions
#   score_res <- NULL
# }

## ----eval = FALSE-------------------------------------------------------------
# if (!is.null(score_res)) {
#   c(score_res[[1]]$coef$pval[1], ## robust score test p-value for the first taxon
#   score_res[[2]]$coef$pval[2]) ## robust score test p-value for the second taxon
# }

## ----eval = FALSE-------------------------------------------------------------
# if (!is.null(score_res)) {
#   full_score <- sapply(1:length(score_res),
#                        function(x) score_res[[x]]$coef$score_stat[x])
#   full_pval <- sapply(1:length(score_res),
#                       function(x) score_res[[x]]$coef$pval[x])
#   full_coef <- ch_fit$coef %>%
#     dplyr::select(-score_stat, -pval) %>%
#     filter(category_num %in% 1:5) %>%
#     mutate(score_stat = full_score,
#            pval = full_pval)
#   full_coef
# }