## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----setup, message = FALSE---------------------------------------------------
library(pepdiff)
library(dplyr)

## ----generate-data------------------------------------------------------------
set.seed(789)

n_peptides <- 40
n_reps <- 5  # 5 reps per condition = 20 observations per peptide

peptides <- paste0("PEP_", sprintf("%03d", 1:n_peptides))
genes <- paste0("GENE_", LETTERS[((1:n_peptides - 1) %% 26) + 1])

# Step 1: Create base abundance for each peptide
peptide_info <- tibble(

  peptide = peptides,
  gene_id = genes,
  pep_num = 1:n_peptides,
  base = rgamma(n_peptides, shape = 6, rate = 0.6)
)

# Step 2: Create the experimental design
design <- expand.grid(
  peptide = peptides,
  treatment = c("ctrl", "trt"),
  timepoint = c("early", "late"),
  bio_rep = 1:n_reps,
  stringsAsFactors = FALSE
)

# Step 3: Join design with peptide info and apply effects
# Using heavier-tailed noise to justify ART
sim_data <- design %>%
  left_join(peptide_info, by = "peptide") %>%
  mutate(
    # Group A (peptides 1-10): Treatment effect only
    # trt is 3-fold higher than ctrl, same at both timepoints
    trt_effect = ifelse(pep_num <= 10 & treatment == "trt", 3, 1),

    # Group B (peptides 11-20): Timepoint effect only
    # late is 3-fold higher than early, same for both treatments
    time_effect = ifelse(pep_num > 10 & pep_num <= 20 & timepoint == "late", 3, 1),

    # Group C (peptides 21-30): Interaction
    # Treatment works at late (4-fold) but NOT at early
    int_effect = case_when(
      pep_num > 20 & pep_num <= 30 & treatment == "trt" & timepoint == "late" ~ 4,
      TRUE ~ 1
    ),

    # Group D (peptides 31-40): No effect
    # All effect multipliers are 1

    # Add heavy-tailed noise (occasional extreme values)
    # This is why we might prefer ART over GLM
    extreme = ifelse(runif(n()) < 0.05, runif(n(), 2, 4), 1),

    # Final abundance with Gamma noise and occasional extremes
    value = rgamma(n(), shape = 10, rate = 10 / (base * trt_effect * time_effect * int_effect)) * extreme
  ) %>%
  select(peptide, gene_id, treatment, timepoint, bio_rep, value)

# Import
temp_file <- tempfile(fileext = ".csv")
write.csv(sim_data, temp_file, row.names = FALSE)

dat <- read_pepdiff(
  temp_file,
  id = "peptide",
  gene = "gene_id",
  value = "value",
  factors = c("treatment", "timepoint"),
  replicate = "bio_rep"
)

dat

## ----run-art, message = FALSE-------------------------------------------------
results_art <- compare(
  dat,
  compare = "treatment",
  ref = "ctrl",
  method = "art"
)

results_art

## ----view-results-------------------------------------------------------------
results_art$results %>%
  mutate(
    pep_num = as.numeric(gsub("PEP_", "", peptide)),
    group = case_when(
      pep_num <= 10 ~ "A: Treatment only",
      pep_num <= 20 ~ "B: Timepoint only",
      pep_num <= 30 ~ "C: Interaction",
      TRUE ~ "D: No effect"
    )
  ) %>%
  group_by(group) %>%
  summarise(
    n_significant = sum(significant),
    n_peptides = n(),
    median_fc = round(median(fold_change, na.rm = TRUE), 2)
  )

## ----plot-art-results, fig.height = 6-----------------------------------------
plot(results_art)

## ----art-stratified, message = FALSE------------------------------------------
results_strat <- compare(
  dat,
  compare = "treatment",
  ref = "ctrl",
  within = "timepoint",
  method = "art"
)

results_strat

## ----view-stratified----------------------------------------------------------
results_strat$results %>%
  mutate(
    pep_num = as.numeric(gsub("PEP_", "", peptide)),
    group = case_when(
      pep_num <= 10 ~ "A: Treatment only",
      pep_num <= 20 ~ "B: Timepoint only",
      pep_num <= 30 ~ "C: Interaction",
      TRUE ~ "D: No effect"
    )
  ) %>%
  group_by(group, timepoint) %>%
  summarise(
    n_significant = sum(significant),
    median_fc = round(median(fold_change, na.rm = TRUE), 2),
    .groups = "drop"
  ) %>%
  tidyr::pivot_wider(
    names_from = timepoint,
    values_from = c(n_significant, median_fc)
  )

## ----compare-methods, message = FALSE-----------------------------------------
results_glm <- compare(dat, compare = "treatment", ref = "ctrl", method = "glm")

## ----compare-significant------------------------------------------------------
# Compare significant calls
comparison <- tibble(
  peptide = results_glm$results$peptide,
  glm_sig = results_glm$results$significant,
  art_sig = results_art$results$significant,
  glm_pval = round(results_glm$results$p_value, 4),
  art_pval = round(results_art$results$p_value, 4)
)

# Agreement
cat("Both significant:", sum(comparison$glm_sig & comparison$art_sig), "\n")
cat("GLM only:", sum(comparison$glm_sig & !comparison$art_sig), "\n")
cat("ART only:", sum(!comparison$glm_sig & comparison$art_sig), "\n")
cat("Neither:", sum(!comparison$glm_sig & !comparison$art_sig), "\n")

## ----disagreements------------------------------------------------------------
# Peptides where methods disagree
disagreements <- comparison %>%
  filter(glm_sig != art_sig) %>%
  arrange(pmin(glm_pval, art_pval))

if (nrow(disagreements) > 0) {
  print(disagreements)
} else {
  cat("No disagreements between methods\n")
}

## ----art-diagnostics----------------------------------------------------------
results_art$diagnostics %>%
  head()

## ----check-convergence--------------------------------------------------------
n_converged <- sum(results_art$diagnostics$converged)
n_failed <- sum(!results_art$diagnostics$converged)

cat("Converged:", n_converged, "\n")
cat("Failed:", n_failed, "\n")

## ----results-structure--------------------------------------------------------
glimpse(results_art$results)

## ----stratified-structure-----------------------------------------------------
glimpse(results_strat$results)

## ----export-results, eval = FALSE---------------------------------------------
# # All results
# write.csv(results_art$results, "art_results.csv", row.names = FALSE)
# 
# # Significant hits only
# results_art$results %>%
#   filter(significant) %>%
#   write.csv("art_significant.csv", row.names = FALSE)
# 
# # Stratified results
# write.csv(results_strat$results, "art_stratified.csv", row.names = FALSE)

## ----cleanup, include = FALSE-------------------------------------------------
unlink(temp_file)

