## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  echo = TRUE,
  comment = "#>",
  fig.align = "center")

# Set to TRUE to regenerate long-running simulation results
run_long <- FALSE

library(maskedcauses)
old_opts <- options(digits = 4)

## ----hom-setup----------------------------------------------------------------
m <- 5
theta_hom <- c(k = 1.5, beta1 = 300, beta2 = 400, beta3 = 500,
               beta4 = 600, beta5 = 700)

model_hom <- wei_series_homogeneous_md_c1_c2_c3()

# Right-censoring time at the 80th percentile of the system lifetime
beta_sys <- wei_series_system_scale(theta_hom[1], theta_hom[-1])
tau <- qweibull(0.8, shape = theta_hom[1], scale = beta_sys)

cat("System scale:", round(beta_sys, 1), "\n")
cat("Right-censoring time (q = 0.8):", round(tau, 1), "\n")

## ----hom-generate-------------------------------------------------------------
set.seed(2024)
gen_hom <- rdata(model_hom)

df <- gen_hom(theta_hom, n = 300, p = 0.2,
              observe = observe_right_censor(tau = tau))

cat("Observation types:\n")
print(table(df$omega))

## ----hom-fit-all, warning=FALSE-----------------------------------------------
# Exponential (m params)
model_exp <- exp_series_md_c1_c2_c3()
fit_e <- fit(model_exp)(df, par = rep(0.002, m), method = "Nelder-Mead")

# Homogeneous Weibull (m+1 params)
fit_h <- fit(model_hom)(df, par = c(1, rep(500, m)),
             method = "Nelder-Mead", control = list(maxit = 5000))

# Heterogeneous Weibull (2m params) — warm-start from hom fit
model_het <- wei_series_md_c1_c2_c3()
het_start <- as.numeric(rbind(rep(fit_h$par[1], m), fit_h$par[-1]))
fit_w <- fit(model_het)(df, par = het_start,
             method = "Nelder-Mead", control = list(maxit = 10000))

## ----hom-lrt------------------------------------------------------------------
np_exp <- m
np_hom <- m + 1
np_het <- 2 * m
n_obs <- nrow(df)

# LRT: heterogeneous vs homogeneous
LRT_shape <- -2 * (fit_h$loglik - fit_w$loglik)
df_shape <- np_het - np_hom   # m - 1 = 4
p_shape <- pchisq(LRT_shape, df = df_shape, lower.tail = FALSE)

# LRT: homogeneous vs exponential
LRT_aging <- -2 * (fit_e$loglik - fit_h$loglik)
df_aging <- np_hom - np_exp   # 1
p_aging <- pchisq(LRT_aging, df = df_aging, lower.tail = FALSE)

# AIC and BIC
aic_val <- function(ll, k) -2 * ll + 2 * k
bic_val <- function(ll, k, n) -2 * ll + log(n) * k

comparison <- data.frame(
  Model = c("Exponential", "Hom. Weibull", "Het. Weibull"),
  Params = c(np_exp, np_hom, np_het),
  LogLik = c(fit_e$loglik, fit_h$loglik, fit_w$loglik),
  AIC = c(aic_val(fit_e$loglik, np_exp),
          aic_val(fit_h$loglik, np_hom),
          aic_val(fit_w$loglik, np_het)),
  BIC = c(bic_val(fit_e$loglik, np_exp, n_obs),
          bic_val(fit_h$loglik, np_hom, n_obs),
          bic_val(fit_w$loglik, np_het, n_obs))
)

knitr::kable(comparison, digits = 2,
             caption = "Model comparison (true model: homogeneous Weibull)",
             col.names = c("Model", "# Params", "Log-Lik", "AIC", "BIC"))

## ----hom-lrt-results----------------------------------------------------------
cat("Step 1: Het vs Hom (shape heterogeneity)\n")
cat(sprintf("  LRT = %.2f, df = %d, p = %.4f\n", LRT_shape, df_shape, p_shape))
if (p_shape < 0.05) {
  cat("  Decision: Reject homogeneous -> adopt heterogeneous model\n\n")
} else {
  cat("  Decision: Fail to reject -> retain homogeneous model\n\n")
}

cat("Step 2: Hom vs Exp (aging)\n")
cat(sprintf("  LRT = %.2f, df = %d, p = %.4f\n", LRT_aging, df_aging, p_aging))
if (p_aging < 0.05) {
  cat("  Decision: Reject exponential -> adopt homogeneous model\n")
} else {
  cat("  Decision: Fail to reject -> retain exponential model\n")
}

## ----het-setup----------------------------------------------------------------
# Shapes span DFR to strong IFR
theta_het_true <- c(0.8, 300,    # DFR (electronics)
                    1.0, 400,    # CFR (random)
                    1.5, 500,    # mild IFR (seals)
                    2.0, 600,    # moderate IFR (bearings)
                    2.5, 700)    # strong IFR (wear-out)

model_het <- wei_series_md_c1_c2_c3()
gen_het <- rdata(model_het)

set.seed(7231)
df_het <- gen_het(theta_het_true, n = 300, p = 0.2,
                  observe = observe_right_censor(tau = tau))

cat("Observation types:\n")
print(table(df_het$omega))

## ----het-fit-all, warning=FALSE-----------------------------------------------
fit_e2 <- fit(model_exp)(df_het, par = rep(0.002, m), method = "Nelder-Mead")
fit_h2 <- fit(model_hom)(df_het, par = c(1, rep(500, m)),
              method = "Nelder-Mead", control = list(maxit = 5000))
het_start2 <- as.numeric(rbind(rep(fit_h2$par[1], m), fit_h2$par[-1]))
fit_w2 <- fit(model_het)(df_het, par = het_start2,
              method = "Nelder-Mead", control = list(maxit = 10000))

## ----het-lrt------------------------------------------------------------------
# LRT: heterogeneous vs homogeneous
LRT_shape2 <- -2 * (fit_h2$loglik - fit_w2$loglik)
p_shape2 <- pchisq(LRT_shape2, df = m - 1, lower.tail = FALSE)

# LRT: homogeneous vs exponential
LRT_aging2 <- -2 * (fit_e2$loglik - fit_h2$loglik)
p_aging2 <- pchisq(LRT_aging2, df = 1, lower.tail = FALSE)

comparison2 <- data.frame(
  Model = c("Exponential", "Hom. Weibull", "Het. Weibull"),
  Params = c(np_exp, np_hom, np_het),
  LogLik = c(fit_e2$loglik, fit_h2$loglik, fit_w2$loglik),
  AIC = c(aic_val(fit_e2$loglik, np_exp),
          aic_val(fit_h2$loglik, np_hom),
          aic_val(fit_w2$loglik, np_het)),
  BIC = c(bic_val(fit_e2$loglik, np_exp, nrow(df_het)),
          bic_val(fit_h2$loglik, np_hom, nrow(df_het)),
          bic_val(fit_w2$loglik, np_het, nrow(df_het)))
)

knitr::kable(comparison2, digits = 2,
             caption = "Model comparison (true model: heterogeneous Weibull)",
             col.names = c("Model", "# Params", "Log-Lik", "AIC", "BIC"))

cat(sprintf("\nStep 1: Het vs Hom — LRT = %.2f, df = %d, p = %.4f\n",
            LRT_shape2, m - 1, p_shape2))
cat(sprintf("Step 2: Hom vs Exp — LRT = %.2f, df = %d, p = %.4f\n",
            LRT_aging2, 1, p_aging2))

## ----load-precomputed-mc, include=FALSE, eval=!run_long-----------------------
mc_data <- readRDS("precomputed_model_selection.rds")

## ----mc-setup, eval=run_long--------------------------------------------------
# set.seed(42)
# 
# m <- 5
# base_k <- 1.5
# base_scales <- c(300, 400, 500, 600, 700)
# cv_levels <- c(0, 0.05, 0.10, 0.20, 0.30, 0.50)
# n_reps <- 200
# n_obs <- 500
# p_mask <- 0.2
# alpha <- 0.05
# 
# # Fixed offsets: mean 0, sd 1
# offsets <- c(-2, -1, 0, 1, 2)
# offsets <- offsets / sd(offsets)
# 
# # Right-censoring time (based on homogeneous baseline at CV = 0)
# beta_sys <- wei_series_system_scale(base_k, base_scales)
# tau <- qweibull(0.8, shape = base_k, scale = beta_sys)
# 
# # Models and solvers
# model_exp <- exp_series_md_c1_c2_c3()
# model_hom <- wei_series_homogeneous_md_c1_c2_c3()
# model_het <- wei_series_md_c1_c2_c3()
# 
# solver_exp <- fit(model_exp)
# solver_hom <- fit(model_hom)
# solver_het <- fit(model_het)
# 
# gen_het <- rdata(model_het)
# 
# np_exp <- m
# np_hom <- m + 1
# np_het <- 2 * m

## ----mc-run, eval=run_long, cache=TRUE, warning=FALSE, message=FALSE----------
# results_list <- vector("list", length(cv_levels) * n_reps)
# idx <- 1
# 
# for (cv in cv_levels) {
#   shapes <- base_k * (1 + cv * offsets)
#   theta <- as.numeric(rbind(shapes, base_scales))
# 
#   for (r in seq_len(n_reps)) {
#     df_r <- gen_het(theta, n = n_obs, p = p_mask,
#                     observe = observe_right_censor(tau = tau))
# 
#     row <- list(cv = cv, rep = r,
#                 reject_het_vs_hom = NA, reject_hom_vs_exp = NA,
#                 ll_exp = NA, ll_hom = NA, ll_het = NA,
#                 aic_exp = NA, aic_hom = NA, aic_het = NA,
#                 bic_exp = NA, bic_hom = NA, bic_het = NA,
#                 converged_all = FALSE)
# 
#     tryCatch({
#       fe <- solver_exp(df_r, par = rep(0.002, m), method = "Nelder-Mead")
# 
#       # Two-phase for hom: Nelder-Mead warm-up -> L-BFGS-B refinement
#       fh_nm <- solver_hom(df_r, par = c(1, rep(500, m)),
#                            method = "Nelder-Mead",
#                            control = list(maxit = 5000))
#       fh <- solver_hom(df_r, par = fh_nm$par,
#                         method = "L-BFGS-B", lower = rep(1e-6, np_hom))
# 
#       # Two-phase for het: warm-start from hom, Nelder-Mead -> L-BFGS-B
#       het_start <- as.numeric(rbind(rep(fh$par[1], m), fh$par[-1]))
#       fw_nm <- solver_het(df_r, par = het_start,
#                            method = "Nelder-Mead",
#                            control = list(maxit = 10000))
#       fw <- solver_het(df_r, par = fw_nm$par,
#                         method = "L-BFGS-B", lower = rep(1e-6, np_het))
# 
#       if (fe$converged && fh$converged && fw$converged) {
#         row$ll_exp <- fe$loglik
#         row$ll_hom <- fh$loglik
#         row$ll_het <- fw$loglik
# 
#         row$aic_exp <- -2 * fe$loglik + 2 * np_exp
#         row$aic_hom <- -2 * fh$loglik + 2 * np_hom
#         row$aic_het <- -2 * fw$loglik + 2 * np_het
# 
#         row$bic_exp <- -2 * fe$loglik + log(n_obs) * np_exp
#         row$bic_hom <- -2 * fh$loglik + log(n_obs) * np_hom
#         row$bic_het <- -2 * fw$loglik + log(n_obs) * np_het
# 
#         LRT_het <- -2 * (fh$loglik - fw$loglik)
#         LRT_hom <- -2 * (fe$loglik - fh$loglik)
# 
#         row$reject_het_vs_hom <- pchisq(LRT_het, df = m - 1,
#                                           lower.tail = FALSE) < alpha
#         row$reject_hom_vs_exp <- pchisq(LRT_hom, df = 1,
#                                           lower.tail = FALSE) < alpha
#         row$converged_all <- TRUE
#       }
#     }, error = function(e) NULL)
# 
#     results_list[[idx]] <- row
#     idx <- idx + 1
#   }
# 
#   cat(sprintf("CV = %.0f%% complete\n", cv * 100))
# }
# 
# power_study <- do.call(rbind, lapply(results_list, as.data.frame))

## ----mc-save, eval=run_long, include=FALSE------------------------------------
# mc_data <- list(
#   power_study = power_study,
#   config = list(m = m, n = n_obs, p = p_mask, tau = tau,
#                 n_reps = n_reps, alpha = alpha,
#                 base_k = base_k, base_scales = base_scales,
#                 offsets = offsets)
# )
# saveRDS(mc_data, "precomputed_model_selection.rds")

## ----mc-rejection-rates-------------------------------------------------------
ps <- mc_data$power_study
ps_conv <- ps[ps$converged_all, ]

rej_summary <- do.call(rbind, lapply(split(ps_conv, ps_conv$cv), function(d) {
  data.frame(
    CV_pct = d$cv[1] * 100,
    n_converged = nrow(d),
    reject_het_vs_hom = mean(d$reject_het_vs_hom),
    reject_hom_vs_exp = mean(d$reject_hom_vs_exp)
  )
}))

knitr::kable(rej_summary, digits = 3, row.names = FALSE,
             caption = sprintf(
               "Rejection rates at alpha = %.2f (n = %d, m = %d)",
               mc_data$config$alpha, mc_data$config$n, mc_data$config$m),
             col.names = c("Shape CV (%)", "Converged",
                          "Reject Hom (Het vs Hom)",
                          "Reject Exp (Hom vs Exp)"))

## ----mc-power-curve, fig.width=7, fig.height=5--------------------------------
plot(rej_summary$CV_pct, rej_summary$reject_het_vs_hom,
     type = "b", pch = 19, col = "steelblue", lwd = 2,
     xlab = "Coefficient of Variation of Shapes (%)",
     ylab = "Rejection Rate",
     ylim = c(0, 1),
     main = "Power of the LRT Cascade")
lines(rej_summary$CV_pct, rej_summary$reject_hom_vs_exp,
      type = "b", pch = 17, col = "firebrick", lwd = 2, lty = 2)
abline(h = mc_data$config$alpha, lty = 3, col = "gray50")
legend("right",
       c("Het vs Hom (shape heterogeneity)",
         "Hom vs Exp (aging)",
         expression(alpha == 0.05)),
       col = c("steelblue", "firebrick", "gray50"),
       lty = c(1, 2, 3), pch = c(19, 17, NA), lwd = 2, cex = 0.85)
grid()

## ----mc-aic-accuracy----------------------------------------------------------
aic_correct <- do.call(rbind, lapply(split(ps_conv, ps_conv$cv), function(d) {
  cv <- d$cv[1]

  aic_choice <- ifelse(d$aic_het < d$aic_hom & d$aic_het < d$aic_exp, "het",
                ifelse(d$aic_hom < d$aic_exp, "hom", "exp"))
  bic_choice <- ifelse(d$bic_het < d$bic_hom & d$bic_het < d$bic_exp, "het",
                ifelse(d$bic_hom < d$bic_exp, "hom", "exp"))

  correct_model <- if (cv == 0) "hom" else "het"

  data.frame(
    CV_pct = cv * 100,
    AIC_correct = mean(aic_choice == correct_model),
    BIC_correct = mean(bic_choice == correct_model)
  )
}))

knitr::kable(aic_correct, digits = 3, row.names = FALSE,
             caption = "Proportion selecting the correct model",
             col.names = c("Shape CV (%)", "AIC Correct", "BIC Correct"))

## ----cleanup, include=FALSE---------------------------------------------------
options(old_opts)

