## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  echo = TRUE,
  comment = "#>")

# Set to TRUE to regenerate long-running simulation results
run_long <- FALSE

library(maskedcauses)
old_opts <- options(digits = 4)

## ----worked-example-params----------------------------------------------------
theta <- c(k = 1.5, beta1 = 100, beta2 = 150, beta3 = 200)
k <- theta[1]
scales <- theta[-1]
m <- length(scales)

beta_sys <- wei_series_system_scale(k, scales)
cat("System scale (beta_sys):", round(beta_sys, 2), "\n")
cat("System mean lifetime:", round(beta_sys * gamma(1 + 1/k), 2), "\n")

## ----hazard-plot, fig.width=7, fig.height=4.5---------------------------------
model <- wei_series_homogeneous_md_c1_c2_c3()
t_grid <- seq(0.1, 200, length.out = 300)

cols <- c("#E41A1C", "#377EB8", "#4DAF4A")
plot(NULL, xlim = c(0, 200), ylim = c(0, 0.06),
     xlab = "Time t", ylab = "Hazard h_j(t)",
     main = "Component Hazard Functions")
for (j in seq_len(m)) {
  h_j <- component_hazard(model, j)
  lines(t_grid, h_j(t_grid, theta), col = cols[j], lwd = 2)
}
legend("topleft", paste0("Component ", 1:m, " (beta=", scales, ")"),
       col = cols, lwd = 2, cex = 0.9)

## ----cause-probs--------------------------------------------------------------
# Analytical cause weights: w_j = beta_j^{-k} / sum(beta_l^{-k})
w <- scales^(-k) / sum(scales^(-k))
names(w) <- paste0("Component ", 1:m)
cat("Cause weights (w_j):\n")
print(round(w, 4))

## ----cond-cause-plot, fig.width=7, fig.height=4-------------------------------
ccp_fn <- conditional_cause_probability(
  wei_series_homogeneous_md_c1_c2_c3(scales = scales)
)
probs <- ccp_fn(t_grid, theta)

plot(NULL, xlim = c(0, 200), ylim = c(0, 0.7),
     xlab = "Time t", ylab = "P(K = j | T = t)",
     main = "Conditional Cause Probability vs Time")
for (j in seq_len(m)) {
  lines(t_grid, probs[, j], col = cols[j], lwd = 2)
}
legend("right", paste0("Component ", 1:m),
       col = cols, lwd = 2, cex = 0.9)
abline(h = w, col = cols, lty = 3)

## ----gen-periodic-data--------------------------------------------------------
gen <- rdata(model)

set.seed(2024)
df <- gen(theta, n = 500, p = 0.3,
          observe = observe_periodic(delta = 20, tau = 250))

cat("Observation types:\n")
print(table(df$omega))
cat("\nFirst 6 rows:\n")
print(head(df), row.names = FALSE)

## ----mle-fit, warning=FALSE---------------------------------------------------
solver <- fit(model)
theta0 <- c(1.2, 110, 140, 180)  # Initial guess near true values

estimate <- solver(df, par = theta0, method = "Nelder-Mead")
print(estimate)

## ----mle-recovery-------------------------------------------------------------
cat("True parameters: ", round(theta, 2), "\n")
cat("MLE estimates:   ", round(estimate$par, 2), "\n")
cat("Std errors:      ", round(sqrt(diag(estimate$vcov)), 2), "\n")
cat("Relative error:  ",
    round(100 * abs(estimate$par - theta) / theta, 1), "%\n")

## ----score-hessian-verify-----------------------------------------------------
ll_fn <- loglik(model)
scr_fn <- score(model)
hess_fn <- hess_loglik(model)

# Score at MLE should be near zero
scr_mle <- scr_fn(df, estimate$par)
cat("Score at MLE:", round(scr_mle, 4), "\n")

# Verify score against numerical gradient
scr_num <- numDeriv::grad(function(th) ll_fn(df, th), estimate$par)
cat("Max |analytical - numerical| score:",
    formatC(max(abs(scr_mle - scr_num)), format = "e", digits = 2), "\n")

# Hessian eigenvalues (should be negative for concavity)
H <- hess_fn(df, estimate$par)
cat("Hessian eigenvalues:", round(eigen(H)$values, 2), "\n")

## ----load-precomputed, include=FALSE, eval=!run_long--------------------------
results <- readRDS("precomputed_weibull_homogeneous.rds")

## ----mc-setup, cache=TRUE, eval=run_long--------------------------------------
# set.seed(2024)
# 
# B <- 100          # Monte Carlo replications
# n_mc <- 500       # Sample size per replication
# p_mc <- 0.3       # Masking probability
# alpha <- 0.05     # CI level
# 
# # Three shape regimes
# scenarios <- list(
#   DFR = c(k = 0.7, beta1 = 100, beta2 = 150, beta3 = 200),
#   CFR = c(k = 1.0, beta1 = 100, beta2 = 150, beta3 = 200),
#   IFR = c(k = 1.5, beta1 = 100, beta2 = 150, beta3 = 200)
# )
# 
# results <- list()

## ----mc-run, cache=TRUE, eval=run_long, warning=FALSE, message=FALSE----------
# model_mc <- wei_series_homogeneous_md_c1_c2_c3()
# gen_mc <- rdata(model_mc)
# solver_mc <- fit(model_mc)
# 
# for (sc_name in names(scenarios)) {
#   th <- scenarios[[sc_name]]
#   k_sc <- th[1]
#   scales_sc <- th[-1]
#   m_sc <- length(scales_sc)
#   npars <- m_sc + 1
# 
#   # Choose tau to give ~25% right-censoring
#   beta_sys_sc <- wei_series_system_scale(k_sc, scales_sc)
#   tau_sc <- qweibull(0.75, shape = k_sc, scale = beta_sys_sc)
#   delta_sc <- tau_sc / 10  # ~10 inspection intervals
# 
#   estimates <- matrix(NA, nrow = B, ncol = npars)
#   se_est <- matrix(NA, nrow = B, ncol = npars)
#   ci_lower <- matrix(NA, nrow = B, ncol = npars)
#   ci_upper <- matrix(NA, nrow = B, ncol = npars)
#   converged <- logical(B)
#   cens_fracs <- numeric(B)
# 
#   for (b in seq_len(B)) {
#     df_b <- gen_mc(th, n = n_mc, p = p_mc,
#                    observe = observe_periodic(delta = delta_sc, tau = tau_sc))
#     cens_fracs[b] <- mean(df_b$omega == "right")
# 
#     tryCatch({
#       est_b <- solver_mc(df_b, par = c(1, rep(120, m_sc)),
#                          method = "L-BFGS-B",
#                          lower = rep(1e-6, npars))
#       estimates[b, ] <- est_b$par
#       se_est[b, ] <- sqrt(diag(est_b$vcov))
#       z <- qnorm(1 - alpha / 2)
#       ci_lower[b, ] <- est_b$par - z * se_est[b, ]
#       ci_upper[b, ] <- est_b$par + z * se_est[b, ]
#       converged[b] <- est_b$converged
#     }, error = function(e) {
#       converged[b] <<- FALSE
#     })
#   }
# 
#   results[[sc_name]] <- list(
#     theta = th, estimates = estimates, se_est = se_est,
#     ci_lower = ci_lower, ci_upper = ci_upper,
#     converged = converged, cens_fracs = cens_fracs,
#     tau = tau_sc, delta = delta_sc
#   )
# }

## ----mc-save, include=FALSE, eval=run_long------------------------------------
# saveRDS(results, "precomputed_weibull_homogeneous.rds")

## ----mc-results-table---------------------------------------------------------
summary_rows <- list()

for (sc_name in names(results)) {
  res <- results[[sc_name]]
  th <- res$theta
  valid <- res$converged & !is.na(res$estimates[, 1])
  est_v <- res$estimates[valid, , drop = FALSE]

  bias <- colMeans(est_v) - th
  variance <- apply(est_v, 2, var)
  mse <- bias^2 + variance
  pnames <- c("k", paste0("beta", seq_along(th[-1])))

  for (j in seq_along(th)) {
    summary_rows[[length(summary_rows) + 1]] <- data.frame(
      Regime = sc_name,
      Parameter = pnames[j],
      True = th[j],
      Mean_Est = mean(est_v[, j]),
      Bias = bias[j],
      RMSE = sqrt(mse[j]),
      Rel_Bias_Pct = 100 * bias[j] / th[j],
      stringsAsFactors = FALSE
    )
  }
}

mc_table <- do.call(rbind, summary_rows)

knitr::kable(mc_table, digits = 3, row.names = FALSE,
             caption = "Monte Carlo Results by Shape Regime (B=100, n=500)",
             col.names = c("Regime", "Parameter", "True", "Mean Est.",
                          "Bias", "RMSE", "Rel. Bias %"))

## ----mc-coverage--------------------------------------------------------------
coverage_rows <- list()

for (sc_name in names(results)) {
  res <- results[[sc_name]]
  th <- res$theta
  valid <- res$converged & !is.na(res$ci_lower[, 1])
  pnames <- c("k", paste0("beta", seq_along(th[-1])))

  for (j in seq_along(th)) {
    valid_j <- valid & !is.na(res$ci_lower[, j]) & !is.na(res$ci_upper[, j])
    covered <- (res$ci_lower[valid_j, j] <= th[j]) &
               (th[j] <= res$ci_upper[valid_j, j])
    width <- mean(res$ci_upper[valid_j, j] - res$ci_lower[valid_j, j])

    coverage_rows[[length(coverage_rows) + 1]] <- data.frame(
      Regime = sc_name,
      Parameter = pnames[j],
      Coverage = mean(covered),
      Mean_Width = width,
      stringsAsFactors = FALSE
    )
  }
}

cov_table <- do.call(rbind, coverage_rows)

knitr::kable(cov_table, digits = 3, row.names = FALSE,
             caption = "95% Wald CI Coverage by Shape Regime",
             col.names = c("Regime", "Parameter", "Coverage", "Mean Width"))

## ----mc-censoring-rates-------------------------------------------------------
for (sc_name in names(results)) {
  res <- results[[sc_name]]
  conv_rate <- mean(res$converged)
  cens_rate <- mean(res$cens_fracs[res$converged])
  cat(sprintf("%s (k=%.1f): convergence=%.1f%%, mean censoring=%.1f%%\n",
              sc_name, res$theta[1], 100 * conv_rate, 100 * cens_rate))
}

## ----mc-sampling-dist, fig.width=8, fig.height=7------------------------------
oldpar <- par(mfrow = c(3, 4), mar = c(4, 3, 2, 1), oma = c(0, 0, 2, 0))
on.exit(par(oldpar))
pnames <- c("k", "beta1", "beta2", "beta3")

for (sc_name in names(results)) {
  res <- results[[sc_name]]
  th <- res$theta
  valid <- res$converged & !is.na(res$estimates[, 1])
  est_v <- res$estimates[valid, , drop = FALSE]

  for (j in seq_along(th)) {
    hist(est_v[, j], breaks = 20, probability = TRUE,
         main = paste0(sc_name, ": ", pnames[j]),
         xlab = pnames[j],
         col = "lightblue", border = "white", cex.main = 0.9)
    abline(v = th[j], col = "red", lwd = 2, lty = 2)
    abline(v = mean(est_v[, j]), col = "blue", lwd = 2)
  }
}

mtext("Sampling Distributions by Regime", outer = TRUE, cex = 1.2)

## ----exp-identity-------------------------------------------------------------
# Parameters: k=1 with scales equivalent to rates (1/beta_j)
exp_rates <- c(0.01, 0.008, 0.005)
wei_scales <- 1 / exp_rates  # beta = 1/lambda
wei_theta <- c(1, wei_scales)

# Generate data under exponential model
exp_model <- exp_series_md_c1_c2_c3()
exp_gen <- rdata(exp_model)

set.seed(42)
df_test <- exp_gen(exp_rates, n = 200, tau = 300, p = 0.3)

# Evaluate both log-likelihoods
ll_exp <- loglik(exp_model)
ll_wei <- loglik(model)

val_exp <- ll_exp(df_test, exp_rates)
val_wei <- ll_wei(df_test, wei_theta)

cat("Exponential loglik:", round(val_exp, 6), "\n")
cat("Weibull(k=1) loglik:", round(val_wei, 6), "\n")
cat("Absolute difference:", formatC(abs(val_exp - val_wei),
                                     format = "e", digits = 2), "\n")

## ----exp-identity-score-------------------------------------------------------
# Score comparison
s_exp <- score(exp_model)(df_test, exp_rates)
s_wei <- score(model)(df_test, wei_theta)

# The exponential score is d/d(lambda_j), the Weibull score includes d/dk
# and d/d(beta_j). Since lambda_j = 1/beta_j, by the chain rule:
#   d(ell)/d(lambda_j) = d(ell)/d(beta_j) * d(beta_j)/d(lambda_j)
#                      = d(ell)/d(beta_j) * (-beta_j^2)
# So: d(ell)/d(lambda_j) = -beta_j^2 * d(ell)/d(beta_j)
s_wei_transformed <- -wei_scales^2 * s_wei[-1]

cat("Exponential score:        ", round(s_exp, 4), "\n")
cat("Weibull(k=1) scale score: ", round(s_wei_transformed, 4), "\n")
cat("Max |difference|:",
    formatC(max(abs(s_exp - s_wei_transformed)), format = "e", digits = 2), "\n")

## ----cleanup, include=FALSE---------------------------------------------------
options(old_opts)

