## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  echo = TRUE,
  comment = "#>",
  fig.align = "center")

# Set to TRUE to regenerate long-running simulation results
run_long <- FALSE

library(maskedcauses)
old_opts <- options(digits = 4)

## ----define-theta-------------------------------------------------------------
theta <- c(0.7, 200,   # component 1: DFR (electronics)
           1.0, 150,   # component 2: CFR (seals)
           2.0, 100)   # component 3: IFR (bearing)
m <- length(theta) / 2

## ----hazard-plot, fig.width=7, fig.height=4.5---------------------------------
model <- wei_series_md_c1_c2_c3(
  shapes = theta[seq(1, 6, 2)],
  scales = theta[seq(2, 6, 2)]
)

h1 <- component_hazard(model, 1)
h2 <- component_hazard(model, 2)
h3 <- component_hazard(model, 3)

t_grid <- seq(0.5, 120, length.out = 300)

plot(t_grid, h1(t_grid, theta), type = "l", col = "steelblue", lwd = 2,
     ylim = c(0, 0.05),
     xlab = "Time", ylab = "Hazard rate h(t)",
     main = "Component hazard functions")
lines(t_grid, h2(t_grid, theta), col = "forestgreen", lwd = 2, lty = 2)
lines(t_grid, h3(t_grid, theta), col = "firebrick", lwd = 2, lty = 3)
legend("topright",
       legend = c("Comp 1: DFR (k=0.7)", "Comp 2: CFR (k=1.0)",
                  "Comp 3: IFR (k=2.0)"),
       col = c("steelblue", "forestgreen", "firebrick"),
       lty = 1:3, lwd = 2, cex = 0.85)
grid()

## ----cause-prob-plot, fig.width=7, fig.height=4.5-----------------------------
ccp_fn <- conditional_cause_probability(model)

probs <- ccp_fn(t_grid, theta)

plot(t_grid, probs[, 1], type = "l", col = "steelblue", lwd = 2,
     ylim = c(0, 1),
     xlab = "Time", ylab = "P(K = j | T = t)",
     main = "Conditional cause-of-failure probability")
lines(t_grid, probs[, 2], col = "forestgreen", lwd = 2, lty = 2)
lines(t_grid, probs[, 3], col = "firebrick", lwd = 2, lty = 3)
legend("right",
       legend = c("Comp 1: DFR", "Comp 2: CFR", "Comp 3: IFR"),
       col = c("steelblue", "forestgreen", "firebrick"),
       lty = 1:3, lwd = 2, cex = 0.85)
grid()

## ----integrand-demo-----------------------------------------------------------
shapes <- theta[seq(1, 6, 2)]
scales <- theta[seq(2, 6, 2)]
cind <- c(TRUE, TRUE, FALSE)  # candidate set = {1, 2}

# Evaluate the integrand h_c(t) * S(t) at a few time points
t_eval <- c(10, 50, 100)
vals <- maskedcauses:::wei_series_integrand(
  t_eval, shapes, scales, cind
)
names(vals) <- paste0("t=", t_eval)
vals

## ----integrand-plot, fig.width=7, fig.height=4--------------------------------
t_fine <- seq(0.5, 150, length.out = 500)

# Full candidate set: all components
vals_full <- maskedcauses:::wei_series_integrand(
  t_fine, shapes, scales, cind = rep(TRUE, 3)
)

plot(t_fine, vals_full, type = "l", col = "black", lwd = 2,
     xlab = "Time", ylab = expression(h[c](t) %.% S(t)),
     main = "Weibull series integrand (full candidate set)")
grid()

## ----timing-comparison--------------------------------------------------------
set.seed(123)
gen <- rdata(model)
ll_fn <- loglik(model)

# Exact + right only
df_exact <- gen(theta, n = 100, tau = 120, p = 0.3,
                observe = observe_right_censor(tau = 120))

# Mixed: includes left and interval censoring
df_mixed <- gen(theta, n = 100, p = 0.3,
                observe = observe_mixture(
                  observe_right_censor(tau = 120),
                  observe_left_censor(tau = 80),
                  observe_periodic(delta = 20, tau = 120),
                  weights = c(0.5, 0.25, 0.25)
                ))

cat("Observation type counts (exact/right):\n")
print(table(df_exact$omega))

cat("\nObservation type counts (mixed):\n")
print(table(df_mixed$omega))

t_exact <- system.time(replicate(5, ll_fn(df_exact, theta)))["elapsed"]
t_mixed <- system.time(replicate(5, ll_fn(df_mixed, theta)))["elapsed"]

cat(sprintf("\nLoglik time (exact/right only): %.4f s per eval\n", t_exact / 5))
cat(sprintf("Loglik time (mixed censoring):  %.4f s per eval\n", t_mixed / 5))
cat(sprintf("Slowdown factor: %.1fx\n", t_mixed / t_exact))

## ----mle-generate-------------------------------------------------------------
set.seed(7231)
n_mle <- 300

# Right-censoring only (fast, closed-form likelihood)
gen <- rdata(model)
df <- gen(theta, n = n_mle, tau = 120, p = 0.3,
          observe = observe_right_censor(tau = 120))

cat("Observation types:\n")
print(table(df$omega))
cat("\nFirst few rows:\n")
print(head(df), row.names = FALSE)

## ----mle-fit, warning=FALSE---------------------------------------------------
solver <- fit(model)

# Initial guess: all shapes = 1, scales = 100
theta0 <- rep(c(1, 100), m)

estimate <- solver(df, par = theta0, method = "Nelder-Mead")
print(estimate)

## ----mle-recovery-------------------------------------------------------------
# Parameter recovery table
par_names <- paste0(
  rep(c("k", "beta"), m), "_",
  rep(1:m, each = 2)
)
recovery <- data.frame(
  Parameter = par_names,
  True = theta,
  Estimate = estimate$par,
  SE = sqrt(diag(estimate$vcov)),
  Rel_Error_Pct = 100 * (estimate$par - theta) / theta
)

knitr::kable(recovery, digits = 3,
             caption = paste0("Parameter recovery (n = ", n_mle, ")"),
             col.names = c("Parameter", "True", "MLE", "SE", "Rel. Error %"))

## ----model-comparison-generate------------------------------------------------
set.seed(999)

# Generate from the TRUE heterogeneous DGP
df_comp <- gen(theta, n = 800, tau = 150, p = 0.2,
               observe = observe_right_censor(tau = 150))

cat("Observation types:\n")
print(table(df_comp$omega))

## ----model-comparison-fit, warning=FALSE--------------------------------------
# Fit heterogeneous model (6 parameters)
model_het <- wei_series_md_c1_c2_c3()
solver_het <- fit(model_het)
fit_het <- solver_het(df_comp, par = rep(c(1, 120), m), method = "Nelder-Mead")

# Fit homogeneous model (4 parameters)
model_hom <- wei_series_homogeneous_md_c1_c2_c3()
solver_hom <- fit(model_hom)
theta0_hom <- c(1, 120, 120, 120)
fit_hom <- solver_hom(df_comp, par = theta0_hom, method = "Nelder-Mead")

cat("Heterogeneous model (2m = 6 params):\n")
cat("  Log-likelihood:", fit_het$loglik, "\n")
cat("  Parameters:", round(fit_het$par, 2), "\n\n")

cat("Homogeneous model (m+1 = 4 params):\n")
cat("  Log-likelihood:", fit_hom$loglik, "\n")
cat("  Parameters:", round(fit_hom$par, 2), "\n")

## ----model-comparison-lrt-----------------------------------------------------
# Likelihood ratio test
# H0: k1 = k2 = k3 (homogeneous, 4 params)
# H1: k1, k2, k3 free (heterogeneous, 6 params)
LRT <- 2 * (fit_het$loglik - fit_hom$loglik)
df_lrt <- 6 - 4  # difference in parameters
p_value <- pchisq(LRT, df = df_lrt, lower.tail = FALSE)

comparison <- data.frame(
  Model = c("Heterogeneous (2m)", "Homogeneous (m+1)"),
  Params = c(6, 4),
  LogLik = c(fit_het$loglik, fit_hom$loglik),
  AIC = c(-2 * fit_het$loglik + 2 * 6, -2 * fit_hom$loglik + 2 * 4)
)

knitr::kable(comparison, digits = 2,
             caption = "Model comparison: heterogeneous vs homogeneous",
             col.names = c("Model", "# Params", "Log-Lik", "AIC"))

cat(sprintf("\nLikelihood ratio statistic: %.2f (df = %d, p = %.4f)\n",
            LRT, df_lrt, p_value))

## ----load-precomputed, include=FALSE, eval=!run_long--------------------------
results <- readRDS("precomputed_weibull.rds")

## ----mc-setup, eval=run_long--------------------------------------------------
# set.seed(42)
# 
# theta_mc <- c(0.8, 150, 1.5, 120, 2.0, 100)
# m_mc <- length(theta_mc) / 2
# n_mc <- 1000
# B <- 100
# p_mc <- 0.2
# tau_mc <- 200
# alpha <- 0.05
# 
# model_mc <- wei_series_md_c1_c2_c3()
# gen_mc <- rdata(model_mc)
# solver_mc <- fit(model_mc)
# 
# # Storage
# estimates <- matrix(NA, nrow = B, ncol = 2 * m_mc)
# se_estimates <- matrix(NA, nrow = B, ncol = 2 * m_mc)
# ci_lower <- matrix(NA, nrow = B, ncol = 2 * m_mc)
# ci_upper <- matrix(NA, nrow = B, ncol = 2 * m_mc)
# converged <- logical(B)
# logliks <- numeric(B)

## ----mc-run, eval=run_long, cache=TRUE, warning=FALSE, message=FALSE----------
# theta0_mc <- rep(c(1, 130), m_mc)
# 
# for (b in seq_len(B)) {
#   df_b <- gen_mc(theta_mc, n = n_mc, tau = tau_mc, p = p_mc,
#                  observe = observe_right_censor(tau = tau_mc))
# 
#   tryCatch({
#     fit_b <- solver_mc(df_b, par = theta0_mc,
#                        method = "L-BFGS-B",
#                        lower = rep(1e-6, 2 * m_mc))
#     estimates[b, ] <- fit_b$par
#     se_estimates[b, ] <- sqrt(diag(fit_b$vcov))
#     logliks[b] <- fit_b$loglik
# 
#     z <- qnorm(1 - alpha / 2)
#     ci_lower[b, ] <- fit_b$par - z * se_estimates[b, ]
#     ci_upper[b, ] <- fit_b$par + z * se_estimates[b, ]
#     converged[b] <- fit_b$converged
#   }, error = function(e) {
#     converged[b] <<- FALSE
#   })
# 
#   if (b %% 20 == 0) cat(sprintf("Replication %d/%d\n", b, B))
# }
# 
# cat("Convergence rate:", mean(converged, na.rm = TRUE), "\n")

## ----mc-save, eval=run_long, include=FALSE------------------------------------
# results <- list(
#   estimates = estimates, se_estimates = se_estimates,
#   ci_lower = ci_lower, ci_upper = ci_upper, converged = converged,
#   logliks = logliks, B = B, alpha = alpha,
#   theta_mc = theta_mc, m_mc = m_mc, n_mc = n_mc,
#   p_mc = p_mc, tau_mc = tau_mc
# )
# saveRDS(results, "precomputed_weibull.rds")

## ----mc-results---------------------------------------------------------------
theta_mc <- results$theta_mc
m_mc <- results$m_mc
alpha <- results$alpha

valid <- results$converged & !is.na(results$estimates[, 1])
est_valid <- results$estimates[valid, , drop = FALSE]

bias <- colMeans(est_valid) - theta_mc
variance <- apply(est_valid, 2, var)
mse <- bias^2 + variance
rmse <- sqrt(mse)

par_names_mc <- paste0(
  rep(c("k", "beta"), m_mc), "_",
  rep(1:m_mc, each = 2)
)

results_df <- data.frame(
  Parameter = par_names_mc,
  True = theta_mc,
  Mean_Est = colMeans(est_valid),
  Bias = bias,
  Variance = variance,
  MSE = mse,
  RMSE = rmse,
  Rel_Bias_Pct = 100 * bias / theta_mc
)

knitr::kable(results_df, digits = 4,
             caption = paste0("Monte Carlo results (n = ", results$n_mc,
                              ", B = ", sum(valid), " converged)"),
             col.names = c("Parameter", "True", "Mean Est.",
                          "Bias", "Variance", "MSE", "RMSE",
                          "Rel. Bias %"))

## ----mc-coverage--------------------------------------------------------------
coverage <- numeric(2 * m_mc)
for (j in seq_len(2 * m_mc)) {
  valid_j <- valid & !is.na(results$ci_lower[, j]) &
    !is.na(results$ci_upper[, j])
  covered <- (results$ci_lower[valid_j, j] <= theta_mc[j]) &
    (theta_mc[j] <= results$ci_upper[valid_j, j])
  coverage[j] <- mean(covered)
}

mean_width <- colMeans(
  results$ci_upper[valid, ] - results$ci_lower[valid, ], na.rm = TRUE
)

coverage_df <- data.frame(
  Parameter = par_names_mc,
  True = theta_mc,
  Coverage = coverage,
  Nominal = 1 - alpha,
  Mean_Width = mean_width
)

knitr::kable(coverage_df, digits = 4,
             caption = paste0(100 * (1 - alpha),
                              "% CI coverage (nominal = ",
                              1 - alpha, ")"),
             col.names = c("Parameter", "True", "Coverage",
                          "Nominal", "Mean Width"))

## ----mc-sampling-dist, fig.width=8, fig.height=6------------------------------
oldpar <- par(mfrow = c(2, 3), mar = c(4, 4, 2, 1))
on.exit(par(oldpar))
for (j in seq_len(2 * m_mc)) {
  hist(est_valid[, j], breaks = 20, probability = TRUE,
       main = par_names_mc[j],
       xlab = expression(hat(theta)),
       col = "lightblue", border = "white")
  abline(v = theta_mc[j], col = "red", lwd = 2, lty = 2)
  abline(v = mean(est_valid[, j]), col = "blue", lwd = 2)
  legend("topright", legend = c("True", "Mean"),
         col = c("red", "blue"), lty = c(2, 1), lwd = 2, cex = 0.7)
}

## ----cleanup, include=FALSE---------------------------------------------------
options(old_opts)