## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----setup, message = FALSE---------------------------------------------------
library(pepdiff)
library(dplyr)

## ----generate-data------------------------------------------------------------
set.seed(456)

n_peptides <- 40
n_reps <- 5  # 5 reps per condition = 20 observations per peptide

peptides <- paste0("PEP_", sprintf("%03d", 1:n_peptides))
genes <- paste0("GENE_", LETTERS[((1:n_peptides - 1) %% 26) + 1])

# Step 1: Create base abundance for each peptide
# Each peptide gets ONE base value shared across all its observations
peptide_info <- tibble(
  peptide = peptides,
  gene_id = genes,
  pep_num = 1:n_peptides,
  base = rgamma(n_peptides, shape = 6, rate = 0.6)  # Random baseline abundance
)

# Step 2: Create the experimental design (all combinations)
# 40 peptides × 2 treatments × 2 timepoints × 5 reps = 800 observations
design <- expand.grid(
  peptide = peptides,
  treatment = c("ctrl", "trt"),
  timepoint = c("0h", "24h"),
  bio_rep = 1:n_reps,
  stringsAsFactors = FALSE
)

# Step 3: Join design with peptide info and apply effects
sim_data <- design %>%
  left_join(peptide_info, by = "peptide") %>%
  mutate(
    # Group A (peptides 1-10): Treatment effect only
    # trt is 3-fold higher than ctrl, same at both timepoints
    trt_effect = ifelse(pep_num <= 10 & treatment == "trt", 3, 1),

    # Group B (peptides 11-20): Timepoint effect only
    # 24h is 3-fold higher than 0h, same for both treatments
    # NO treatment effect - ctrl and trt should be identical
    time_effect = ifelse(pep_num > 10 & pep_num <= 20 & timepoint == "24h", 3, 1),

    # Group C (peptides 21-30): Interaction
    # Treatment works at 24h (4-fold) but NOT at 0h
    int_effect = case_when(
      pep_num > 20 & pep_num <= 30 & treatment == "trt" & timepoint == "24h" ~ 4,
      TRUE ~ 1
    ),

    # Group D (peptides 31-40): No effect
    # All effect multipliers are 1 (no change)

    # Final abundance = base × all effects, with Gamma noise
    value = rgamma(n(), shape = 15, rate = 15 / (base * trt_effect * time_effect * int_effect))
  ) %>%
  select(peptide, gene_id, treatment, timepoint, bio_rep, value)

# Import
temp_file <- tempfile(fileext = ".csv")
write.csv(sim_data, temp_file, row.names = FALSE)

dat <- read_pepdiff(
  temp_file,
  id = "peptide",
  gene = "gene_id",
  value = "value",
  factors = c("treatment", "timepoint"),
  replicate = "bio_rep"
)

dat

## ----basic-glm----------------------------------------------------------------
results <- compare(
  dat,
  compare = "treatment",
  ref = "ctrl",
  method = "glm"
)

results

## ----view-results-------------------------------------------------------------
results$results %>%
  mutate(
    pep_num = as.numeric(gsub("PEP_", "", peptide)),
    group = case_when(
      pep_num <= 10 ~ "A: Treatment only",
      pep_num <= 20 ~ "B: Timepoint only",
      pep_num <= 30 ~ "C: Interaction",
      TRUE ~ "D: No effect"
    )
  ) %>%
  group_by(group) %>%
  summarise(
    n_significant = sum(significant),
    n_peptides = n(),
    median_fc = round(median(fold_change, na.rm = TRUE), 2)
  )

## ----plot-glm-results, fig.height = 6-----------------------------------------
plot(results)

## ----stratified---------------------------------------------------------------
results_stratified <- compare(
  dat,
  compare = "treatment",
  ref = "ctrl",
  within = "timepoint",
  method = "glm"
)

results_stratified

## ----view-stratified----------------------------------------------------------
results_stratified$results %>%
  mutate(
    pep_num = as.numeric(gsub("PEP_", "", peptide)),
    group = case_when(
      pep_num <= 10 ~ "A: Treatment only",
      pep_num <= 20 ~ "B: Timepoint only",
      pep_num <= 30 ~ "C: Interaction",
      TRUE ~ "D: No effect"
    )
  ) %>%
  group_by(group, timepoint) %>%
  summarise(
    n_significant = sum(significant),
    median_fc = round(median(fold_change, na.rm = TRUE), 2),
    .groups = "drop"
  ) %>%
  tidyr::pivot_wider(
    names_from = timepoint,
    values_from = c(n_significant, median_fc)
  )

## ----check-interaction--------------------------------------------------------
# Compare main vs stratified for all groups
main_summary <- results$results %>%
  mutate(
    pep_num = as.numeric(gsub("PEP_", "", peptide)),
    group = case_when(
      pep_num <= 10 ~ "A", pep_num <= 20 ~ "B",
      pep_num <= 30 ~ "C", TRUE ~ "D"
    )
  ) %>%
  group_by(group) %>%
  summarise(main_fc = round(median(fold_change, na.rm = TRUE), 2))

strat_summary <- results_stratified$results %>%
  mutate(
    pep_num = as.numeric(gsub("PEP_", "", peptide)),
    group = case_when(
      pep_num <= 10 ~ "A", pep_num <= 20 ~ "B",
      pep_num <= 30 ~ "C", TRUE ~ "D"
    )
  ) %>%
  group_by(group, timepoint) %>%
  summarise(fc = round(median(fold_change, na.rm = TRUE), 2), .groups = "drop") %>%
  tidyr::pivot_wider(names_from = timepoint, values_from = fc)

left_join(main_summary, strat_summary, by = "group")

## ----check-diagnostics--------------------------------------------------------
# Check diagnostics
n_converged <- sum(results$diagnostics$converged)
n_failed <- sum(!results$diagnostics$converged)

cat("Converged:", n_converged, "\n")
cat("Failed:", n_failed, "\n")

## ----view-failed, eval = FALSE------------------------------------------------
# # View failed peptides (if any)
# results$diagnostics %>%
#   filter(!converged) %>%
#   select(peptide, converged)

## ----model-diag---------------------------------------------------------------
# Deviance: lower is better fit (relative to other peptides)
results$diagnostics %>%
  arrange(desc(deviance)) %>%
  head()

## ----results-structure--------------------------------------------------------
# Main effect results
glimpse(results$results)

## ----stratified-structure-----------------------------------------------------
# Stratified results - note the timepoint column
glimpse(results_stratified$results)

## ----export-results, eval = FALSE---------------------------------------------
# # All results
# write.csv(results$results, "glm_results.csv", row.names = FALSE)
# 
# # Significant hits only
# results$results %>%
#   filter(significant) %>%
#   write.csv("significant_hits.csv", row.names = FALSE)
# 
# # Stratified results (one row per peptide per timepoint)
# write.csv(results_stratified$results, "stratified_results.csv", row.names = FALSE)

## ----cleanup, include = FALSE-------------------------------------------------
unlink(temp_file)

