## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----eval=FALSE---------------------------------------------------------------
# # From GitHub
# devtools::install_github("danmaclean/peppwR")

## ----setup--------------------------------------------------------------------
library(peppwR)

## ----generate-data------------------------------------------------------------
set.seed(42)

# Generate peptide-level parameters
n_peptides <- 100
n_per_group <- 4

peptide_params <- tibble::tibble(
  peptide_id = paste0("pep_", sprintf("%04d", 1:n_peptides)),
  # Shape parameter: 1.5-5 (lower = more skewed)
  shape = runif(n_peptides, 1.5, 5),
  # Rate parameter: scaled to give realistic abundances
  rate = runif(n_peptides, 0.01, 0.1)
)

# Generate observations for each peptide
pilot_data <- peptide_params |>
  dplyr::rowwise() |>
  dplyr::mutate(
    data = list(tibble::tibble(
      condition = rep(c("control", "treatment"), each = n_per_group),
      replicate = rep(1:n_per_group, 2),
      abundance = rgamma(n_per_group * 2, shape = shape, rate = rate)
    ))
  ) |>
  dplyr::ungroup() |>
  dplyr::select(peptide_id, data) |>
  tidyr::unnest(data)

# Preview the data
head(pilot_data, 12)

## ----fit-distributions--------------------------------------------------------
fits <- fit_distributions(
  pilot_data,
  id = "peptide_id",
  group = "condition",
  value = "abundance",
  distributions = "continuous"
)

print(fits)

## ----plot-fits, fig.height=6--------------------------------------------------
plot(fits)

## ----density-overlay, eval=FALSE----------------------------------------------
# # Visual check: does the fitted distribution match the data?
# plot_density_overlay(fits, n_overlay = 4)

## ----qq-plots, eval=FALSE-----------------------------------------------------
# # Goodness-of-fit assessment via QQ plots
# plot_qq(fits, n_plots = 4)

## ----sample-size-sanity-check-------------------------------------------------
set.seed(42)

# Generate data with larger sample size - 30 per group
large_n_data <- tibble::tibble(
  peptide_id = "test_peptide",
  condition = rep(c("control", "treatment"), each = 30),
  replicate = rep(1:30, 2),
  abundance = rgamma(60, shape = 3, rate = 0.05)  # True distribution: Gamma
)

large_fits <- fit_distributions(
  large_n_data,
  id = "peptide_id",
  group = "condition",
  value = "abundance",
  distributions = "continuous"
)

print(large_fits)

## ----aggregate-sample-size----------------------------------------------------
result_n <- power_analysis(
  distribution = "gamma",
  params = list(shape = 2, rate = 0.05),
  effect_size = 2,
  target_power = 0.8,
  find = "sample_size",
  n_sim = 1000
)

print(result_n)

## ----plot-aggregate-sample-size, fig.height=4---------------------------------
plot(result_n)

## ----aggregate-power----------------------------------------------------------
result_power <- power_analysis(
  distribution = "gamma",
  params = list(shape = 2, rate = 0.05),
  effect_size = 2,
  n_per_group = 6,
  find = "power",
  n_sim = 1000
)

print(result_power)

## ----per-peptide-analysis-----------------------------------------------------
result_pp <- power_analysis(
  fits,
  effect_size = 2,
  target_power = 0.8,
  find = "sample_size",
  n_sim = 500
)

print(result_pp)

## ----plot-per-peptide, fig.height=4-------------------------------------------
plot(result_pp)