## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  eval     = FALSE
)

## ----setup-data---------------------------------------------------------------
# library(ukbflow)
# 
# # Build on the derive pipeline from vignette("derive")
# df <- ops_toy(n = 500)
# df <- derive_missing(df)
# df <- derive_covariate(df, as_factor = c("p31", "p20116_i0"))
# df <- derive_selfreport(df, name = "dm", regex = "type 2 diabetes")
# df <- derive_icd10(df, name = "dm", icd10 = "E11", source = c("hes", "death"))
# df <- derive_case(df, name = "dm")

## ----derive-timing------------------------------------------------------------
# # Uses {name}_status and {name}_date by default
# df <- derive_timing(df, name = "dm", baseline_col = "p53_i0")

## ----derive-timing-explicit---------------------------------------------------
# df <- derive_timing(df,
#   name         = "dm",
#   status_col   = "dm_status",
#   date_col     = "dm_date",
#   baseline_col = "p53_i0"
# )

## ----derive-age---------------------------------------------------------------
# # Auto-detects {name}_date and {name}_status; produces age_at_{name} column.
# df <- derive_age(df,
#   name         = "dm",
#   baseline_col = "p53_i0",
#   age_col      = "p21022"
# )

## ----derive-age-explicit------------------------------------------------------
# df <- derive_age(df,
#   name         = "dm",
#   baseline_col = "p53_i0",
#   age_col      = "p21022",
#   date_cols    = c(dm = "dm_date"),
#   status_cols  = c(dm = "dm_status")
# )

## ----derive-followup----------------------------------------------------------
# df <- derive_followup(df,
#   name         = "dm",
#   event_col    = "dm_date",
#   baseline_col = "p53_i0",
#   censor_date  = as.Date("2022-10-31"),   # set to your study's cut-off date
#   death_col    = "p40000_i0",
#   lost_col     = FALSE                    # not available in ops_toy
# )

## ----derive-followup-nodeath--------------------------------------------------
# df <- derive_followup(df,
#   name         = "dm",
#   event_col    = "dm_date",
#   baseline_col = "p53_i0",
#   censor_date  = as.Date("2022-10-31"),
#   death_col    = FALSE,
#   lost_col     = FALSE
# )

## ----cox-example--------------------------------------------------------------
# library(survival)
# 
# # Incident analysis: exclude prevalent cases and those with undetermined timing
# df_incident <- df[dm_timing != 1L]
# 
# fit <- coxph(
#   Surv(dm_followup_years, dm_status) ~
#     p20116_i0 + p21022 + p31 + p1558_i0,
#   data = df_incident
# )
# summary(fit)

