## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  eval     = FALSE
)

## ----setup-data---------------------------------------------------------------
# library(ukbflow)
# 
# # ops_toy(scenario = "association") returns a pre-derived analysis-ready table:
# # covariates already as factors, bmi_cat / tdi_cat binned, and two outcomes
# # (dm_* and htn_*) with status / date / timing / followup columns in place.
# dt <- ops_toy(scenario = "association")
# dt <- dt[dm_timing != 1L]   # incident analysis: exclude prevalent DM cases

## ----assoc-coxph--------------------------------------------------------------
# # Crude + age-sex adjusted (automatic); p21022 and p31 auto-detected
# res <- assoc_coxph(
#   data         = dt,
#   outcome_col  = "dm_status",
#   time_col     = "dm_followup_years",
#   exposure_col = c("p20116_i0", "bmi_cat")
# )

## ----assoc-coxph-full---------------------------------------------------------
# # Add a Fully adjusted model
# res <- assoc_coxph(
#   data         = dt,
#   outcome_col  = "dm_status",
#   time_col     = "dm_followup_years",
#   exposure_col = "p20116_i0",
#   covariates   = c("bmi_cat", "tdi_cat", "p1558_i0",
#                    paste0("p22009_a", 1:10))
# )

## ----assoc-logistic-----------------------------------------------------------
# res <- assoc_logistic(
#   data         = dt,
#   outcome_col  = "dm_status",
#   exposure_col = c("p20116_i0", "bmi_cat"),
#   covariates   = c("tdi_cat", "p1558_i0", paste0("p22009_a", 1:10))
# )

## ----assoc-logistic-profile---------------------------------------------------
# res <- assoc_logistic(
#   data         = dt,
#   outcome_col  = "dm_status",
#   exposure_col = "grs_bmi",
#   ci_method    = "profile"   # slower but more accurate for sparse data
# )

## ----assoc-linear-------------------------------------------------------------
# # Continuous outcome (BMI); smoking and GRS as exposures
# res <- assoc_linear(
#   data         = dt,
#   outcome_col  = "p21001_i0",
#   exposure_col = c("p20116_i0", "grs_bmi"),
#   covariates   = c("tdi_cat", "p1558_i0", paste0("p22009_a", 1:10))
# )

## ----assoc-zph----------------------------------------------------------------
# zph <- assoc_coxph_zph(
#   data         = dt,
#   outcome_col  = "dm_status",
#   time_col     = "dm_followup_years",
#   exposure_col = c("p20116_i0", "bmi_cat"),
#   covariates   = c("tdi_cat", "p1558_i0")
# )
# 
# # Identify any violations
# zph[ph_satisfied == FALSE]

## ----assoc-subgroup-----------------------------------------------------------
# # Subgroup by sex; p31 is automatically excluded from within-stratum models
# res <- assoc_subgroup(
#   data         = dt,
#   outcome_col  = "dm_status",
#   time_col     = "dm_followup_years",
#   exposure_col = "p20116_i0",
#   by           = "p31",
#   method       = "coxph",
#   covariates   = c("p21022", "bmi_cat", "tdi_cat")
# )

## ----assoc-trend-prep---------------------------------------------------------
# # assoc_trend() requires an ordered factor; make bmi_cat ordered in-place
# dt[, bmi_cat := factor(bmi_cat, levels = levels(bmi_cat), ordered = TRUE)]

## ----assoc-trend--------------------------------------------------------------
# res <- assoc_trend(
#   data         = dt,
#   outcome_col  = "dm_status",
#   time_col     = "dm_followup_years",
#   exposure_col = "bmi_cat",
#   method       = "coxph",
#   covariates   = c("p21022", "p31", "tdi_cat", "p20116_i0")
# )

## ----assoc-trend-scores-------------------------------------------------------
# res <- assoc_trend(
#   data         = dt,
#   outcome_col  = "dm_status",
#   time_col     = "dm_followup_years",
#   exposure_col = "bmi_cat",
#   method       = "coxph",
#   covariates   = c("p21022", "p31", "tdi_cat", "p20116_i0"),
#   scores       = c(17, 22, 27, 35)   # approximate median BMI per category
# )

## ----assoc-competing-a--------------------------------------------------------
# # Construct a combined event column: 0 = censored, 1 = DM, 2 = HTN (competing)
# dt_cr <- dt[htn_timing != 1L]   # also exclude prevalent HTN
# dt_cr[, event_type := data.table::fcase(
#   dm_timing  == 2L, 1L,
#   htn_timing == 2L, 2L,
#   default          = 0L
# )]
# 
# res <- assoc_competing(
#   data         = dt_cr,
#   outcome_col  = "event_type",       # 0 = censored, 1 = DM, 2 = HTN
#   time_col     = "dm_followup_years",
#   exposure_col = "p20116_i0",
#   event_val    = 1L,
#   compete_val  = 2L,
#   covariates   = c("bmi_cat", "tdi_cat")
# )

## ----assoc-competing-b--------------------------------------------------------
# res <- assoc_competing(
#   data         = dt_cr,
#   outcome_col  = "dm_status",        # primary event
#   time_col     = "dm_followup_years",
#   exposure_col = c("p20116_i0", "bmi_cat"),
#   compete_col  = "htn_status",       # competing event
#   covariates   = c("tdi_cat", "p1558_i0")
# )

## ----downstream---------------------------------------------------------------
# res <- assoc_coxph(
#   data         = dt,
#   outcome_col  = "dm_status",
#   time_col     = "dm_followup_years",
#   exposure_col = "p20116_i0",
#   covariates   = c("bmi_cat", "tdi_cat", "p1558_i0")
# )
# 
# # Pass directly to plot_forest()
# plot_forest(as.data.frame(res))
# 
# # Filter to a single model
# res[model == "Fully adjusted"]
# 
# # Export
# data.table::fwrite(res, "assoc_results.csv")

## ----assoc-lag----------------------------------------------------------------
# res <- assoc_lag(
#   data         = dt,
#   outcome_col  = "dm_status",
#   time_col     = "dm_followup_years",
#   exposure_col = "p20116_i0",
#   lag_years    = c(0, 1, 2),   # 0 = full cohort reference
#   covariates   = c("bmi_cat", "tdi_cat", "p1558_i0")
# )

