## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----data---------------------------------------------------------------------
library(hhdynamics)
data("inputdata")
str(inputdata)
head(inputdata)

## ----si-----------------------------------------------------------------------
data("SI")
print(SI)
barplot(SI, names.arg = 1:14, xlab = "Days", ylab = "Probability",
        main = "Default serial interval distribution (influenza)")

## ----fit, eval = FALSE--------------------------------------------------------
# # With covariates (uses default influenza SI)
# fit <- household_dynamics(inputdata, inf_factor = ~sex, sus_factor = ~age,
#                           n_iteration = 50000, burnin = 10000, thinning = 10)
# 
# # Without covariates
# fit <- household_dynamics(inputdata,
#                           n_iteration = 50000, burnin = 10000, thinning = 10)
# 
# # With a custom serial interval
# my_SI <- c(0, 0.01, 0.05, 0.15, 0.25, 0.25, 0.15, 0.08, 0.04, 0.015, 0.005, 0, 0, 0)
# fit <- household_dynamics(inputdata, SI = my_SI,
#                           n_iteration = 50000, burnin = 10000, thinning = 10)

## ----print, eval = FALSE------------------------------------------------------
# print(fit)
# # Household transmission model fit
# #   Data: 386 households, 1533 individuals
# #   MCMC: 4000 post-burnin samples (50000 iterations, burnin: 10000, thin: 10)
# #   Runtime: 85 seconds
# #   Parameters: community, household, sex1.0, age1.0, age2.0
# #
# # Use summary() for estimates, coef() for posterior means.

## ----summary, eval = FALSE----------------------------------------------------
# summary(fit)
# #                                                    Variable Point estimate Lower bound Upper bound ...
# #               Daily probability of infection from community          0.004       0.002       0.007
# #  Probability of person-to-person transmission in households          0.057       0.034       0.084
# #                                                      sex1.0        -0.081      -0.733       0.488
# #                                                      age1.0        -0.065      -0.537       0.412
# #                                                      age2.0        -0.312      -0.831       0.170

## ----coef, eval = FALSE-------------------------------------------------------
# coef(fit)
# #        re_sd    community    household   size_param       sex1.0       age1.0       age2.0
# #  1.000000000  0.004239978  0.058555948  0.000000000 -0.080689680 -0.065001794 -0.312414284

## ----access, eval = FALSE-----------------------------------------------------
# # Posterior samples matrix (post-burnin, thinned)
# dim(fit$samples)       # e.g. 4000 x 7
# 
# # Log-likelihood trace (full chain, for convergence checks)
# dim(fit$log_likelihood) # e.g. 50000 x 3
# 
# # Per-parameter acceptance rates
# fit$acceptance
# 
# # Trace plot for community parameter
# plot(fit$samples[, "community"], type = "l",
#      ylab = "Community rate", xlab = "Iteration")
# 
# # Posterior density
# hist(fit$samples[, "household"], breaks = 50, probability = TRUE,
#      main = "Household transmission probability (raw scale)",
#      xlab = "Rate parameter")

## ----plot_diag, eval = FALSE--------------------------------------------------
# # Trace plots and posterior densities for all parameters
# plot_diagnostics(fit)
# 
# # Specific parameters only
# plot_diagnostics(fit, params = c("community", "household"))

## ----plot_trans, eval = FALSE-------------------------------------------------
# # Daily probability of transmission with 95% CrI
# plot_transmission(fit)
# 
# # Save to PDF
# pdf("transmission.pdf", width = 7, height = 5)
# plot_transmission(fit)
# dev.off()

## ----plot_cov, eval = FALSE---------------------------------------------------
# # Forest plot of covariate effects (relative risks)
# plot_covariates(fit)
# 
# # With custom labels for variable headers and factor levels
# plot_covariates(fit, file = "covariates.pdf",
#   labels = list(
#     sex = list(name = "Sex", levels = c("Male", "Female")),
#     age = list(name = "Age Group", levels = c("0-5", "6-17", "18+"))))

## ----plot_sar, eval = FALSE---------------------------------------------------
# # Overall attack rate
# plot_attack_rate(fit)
# 
# # Stratified by a single variable
# plot_attack_rate(fit, by = ~age)
# 
# # Combine overall + multiple strata in one figure
# plot_attack_rate(fit, by = list(~sex, ~age), include_overall = TRUE,
#   labels = list(
#     sex = list(name = "Sex", levels = c("Male", "Female")),
#     age = list(name = "Age Group", levels = c("0-5", "6-17", "18+"))))

## ----plot_hh, eval = FALSE----------------------------------------------------
# # Visualize a single household: index (triangle), infected contacts
# # (filled circles), uninfected contacts (open circles)
# plot_household(fit, hh_id = 1)

## ----tab_param, eval = FALSE--------------------------------------------------
# # Posterior mean, median, 95% CrI, and acceptance rate
# table_parameters(fit)
# 
# # Include effective sample size
# table_parameters(fit, show_ess = TRUE)

## ----tab_cov, eval = FALSE----------------------------------------------------
# # Relative risks with 95% CrIs, grouped by infectivity/susceptibility
# table_covariates(fit)

## ----tab_sar, eval = FALSE----------------------------------------------------
# # Overall SAR with Wilson CI
# table_attack_rates(fit)
# 
# # Stratified by covariate
# table_attack_rates(fit, by = ~age)

## ----validation, eval = FALSE-------------------------------------------------
# # Missing column
# household_dynamics(inputdata[, -1])
# # Error: 'input' is missing required columns: hhID
# 
# # Wrong formula variable
# household_dynamics(inputdata, inf_factor = ~nonexistent)
# # Error: Variables in inf_factor not found in input: nonexistent
# 
# # Old string syntax (no longer supported)
# household_dynamics(inputdata, inf_factor = "~sex")
# # Error: 'inf_factor' must be a formula (e.g. ~sex) or NULL.

## ----missing_onset, eval = FALSE----------------------------------------------
# # Some infected contacts have unknown symptom onset
# inputdata_missing_onset <- inputdata
# infected_contacts <- which(inputdata$member > 0 & inputdata$inf == 1)
# inputdata_missing_onset$onset[infected_contacts[1:5]] <- NA
# 
# fit_onset <- household_dynamics(inputdata_missing_onset, ~sex, ~age,
#                                  n_iteration = 50000, burnin = 10000, thinning = 10)
# # Note: 5 of 92 infected contact(s) (5.4%) have missing onset times. These will be imputed during MCMC.

## ----missing, eval = FALSE----------------------------------------------------
# # Introduce some missing values
# inputdata_missing <- inputdata
# inputdata_missing$sex[c(5, 12, 30)] <- NA
# inputdata_missing$age[c(8, 20)] <- NA
# 
# # Fit as usual — missing values are imputed automatically
# fit_missing <- household_dynamics(inputdata_missing, ~sex, ~age,
#                                   n_iteration = 50000, burnin = 10000, thinning = 10)
# # Note: Covariate 'sex' has 3 missing value(s) (0.2%). These will be imputed during MCMC.
# # Note: Covariate 'age' has 2 missing value(s) (0.1%). These will be imputed during MCMC.
# summary(fit_missing)

## ----estimate_si, eval = FALSE------------------------------------------------
# fit_si <- household_dynamics(inputdata, inf_factor = ~sex, sus_factor = ~age,
#                               n_iteration = 50000, burnin = 10000, thinning = 10,
#                               estimate_SI = TRUE)
# summary(fit_si)
# # Output now includes si_shape and si_scale parameters
# 
# # Reconstruct the estimated SI distribution
# est_shape <- mean(fit_si$samples[, "si_shape"])
# est_scale <- mean(fit_si$samples[, "si_scale"])
# est_SI <- pweibull(2:15, est_shape, est_scale) - pweibull(1:14, est_shape, est_scale)
# barplot(est_SI, names.arg = 1:14, xlab = "Days", ylab = "Probability",
#         main = "Estimated serial interval")

## ----simulate, eval = FALSE---------------------------------------------------
# # Use fitted parameter values
# para <- coef(fit)
# 
# # Simulate 10 replicates of the original data structure
# sim <- simulate_data(inputdata, rep_num = 10, inf_factor = ~sex, sus_factor = ~age,
#                      para = para, with_rm = 0)

