## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  echo = TRUE,
  comment = "#>")

# Set to TRUE to regenerate long-running simulation results
run_long <- FALSE

library(maskedcauses)
old_opts <- options(digits = 4)

## ----observe-demo-------------------------------------------------------------
# 1. Right-censoring: continuous monitoring, study ends at tau
obs_right <- observe_right_censor(tau = 10)
obs_right(7)   # failure before tau -> exact
obs_right(15)  # survival past tau  -> right-censored

# 2. Left-censoring: single inspection at tau
obs_left <- observe_left_censor(tau = 10)
obs_left(7)    # failed before inspection -> left-censored
obs_left(15)   # surviving at inspection  -> right-censored

# 3. Periodic inspection: inspections every delta, study ends at tau
obs_periodic <- observe_periodic(delta = 2, tau = 10)
obs_periodic(5.3)  # failure in (4, 6) -> interval-censored
obs_periodic(15)   # survival past tau -> right-censored

# 4. Mixture: compose arbitrary monitoring schemes
obs_mixed <- observe_mixture(
  observe_right_censor(tau = 10),
  observe_left_censor(tau = 5),
  observe_periodic(delta = 2, tau = 10),
  weights = c(0.5, 0.2, 0.3)
)

## ----observe-rdata------------------------------------------------------------
model <- exp_series_md_c1_c2_c3()
gen <- rdata(model)
theta <- c(1, 1.1, 0.95)

set.seed(42)
df <- gen(theta, n = 200, p = 0.3,
          observe = observe_periodic(delta = 0.5, tau = 5))
cat("Observation type distribution:\n")
print(table(df$omega))

## ----hand-verify-data---------------------------------------------------------
# Construct one observation of each type
df_mixed <- data.frame(
  t       = c(3.0,  8.0,  5.0,  2.0),
  t_upper = c(NA,   NA,   NA,   6.0),
  omega   = c("exact", "right", "left", "interval"),
  x1      = c(TRUE,  FALSE, TRUE,  TRUE),
  x2      = c(TRUE,  FALSE, FALSE, TRUE),
  x3      = c(FALSE, FALSE, TRUE,  FALSE),
  stringsAsFactors = FALSE
)
rates <- c(0.5, 0.3, 0.2)
lambda_sys <- sum(rates)

## ----hand-verify-loglik-------------------------------------------------------
exp_model <- exp_series_md_c1_c2_c3()
ll_fn <- loglik(exp_model)
scr_fn <- score(exp_model)

# Total log-likelihood
ll_val <- ll_fn(df_mixed, rates)
cat("Log-likelihood:", round(ll_val, 6), "\n")

# Expected from hand calculation
ll_exact    <- log(0.8) - lambda_sys * 3
ll_right    <- -lambda_sys * 8
ll_left     <- log(0.7) + log(-expm1(-lambda_sys * 5)) - log(lambda_sys)
ll_interval <- log(0.8) - lambda_sys * 2 +
  log(-expm1(-lambda_sys * 4)) - log(lambda_sys)
ll_expected <- ll_exact + ll_right + ll_left + ll_interval
cat("Expected:      ", round(ll_expected, 6), "\n")
cat("Match:", all.equal(ll_val, ll_expected, tolerance = 1e-10), "\n")

## ----hand-verify-score--------------------------------------------------------
scr_analytical <- scr_fn(df_mixed, rates)
scr_numerical <- numDeriv::grad(
  func = function(th) ll_fn(df_mixed, th),
  x = rates
)

score_df <- data.frame(
  Component = paste0("lambda_", 1:3),
  Analytical = round(scr_analytical, 6),
  Numerical = round(scr_numerical, 6),
  Abs_Diff = formatC(abs(scr_analytical - scr_numerical),
                     format = "e", digits = 2)
)
knitr::kable(score_df, caption = "Score verification: analytical vs numerical")

## ----load-precomputed, include=FALSE, eval=!run_long--------------------------
list2env(readRDS("precomputed_censoring_comparison.rds"), envir = environment())

## ----sim-setup, eval=run_long, cache=TRUE-------------------------------------
# set.seed(7231)
# theta <- c(1.0, 1.1, 0.95)
# m <- length(theta)
# n <- 500
# B <- 200
# alpha <- 0.05
# 
# exp_model <- exp_series_md_c1_c2_c3()
# gen <- rdata(exp_model)
# solver <- fit(exp_model)
# theta0 <- rep(1, m)
# 
# # tau for ~25% right-censoring: solve S(tau) = 0.25
# lambda_sys <- sum(theta)
# tau_25 <- -log(0.25) / lambda_sys
# 
# # tau for left-censoring: F(tau) ~ 0.75 -> same tau
# # For left_censor(tau), Pr(left) = F(tau), Pr(right) = S(tau)
# 
# configs <- list(
#   A = list(
#     name = "100% exact",
#     observe = observe_right_censor(tau = Inf)
#   ),
#   B = list(
#     name = "75% exact + 25% right",
#     observe = observe_right_censor(tau = tau_25)
#   ),
#   C = list(
#     name = "~75% left + ~25% right",
#     observe = observe_left_censor(tau = tau_25)
#   ),
#   D = list(
#     name = "mix: exact + left + interval",
#     observe = observe_mixture(
#       observe_right_censor(tau = Inf),
#       observe_left_censor(tau = tau_25),
#       observe_periodic(delta = 0.3, tau = tau_25),
#       weights = c(0.75, 0.125, 0.125)
#     )
#   ),
#   E = list(
#     name = "mix: all four types",
#     observe = observe_mixture(
#       observe_right_censor(tau = tau_25),
#       observe_left_censor(tau = tau_25),
#       observe_periodic(delta = 0.3, tau = tau_25),
#       weights = c(0.70, 0.15, 0.15)
#     )
#   )
# )

## ----sim-run, eval=run_long, cache=TRUE, warning=FALSE, message=FALSE---------
# sim_results <- list()
# 
# for (cfg_name in names(configs)) {
#   cfg <- configs[[cfg_name]]
#   estimates <- matrix(NA, nrow = B, ncol = m)
#   se_estimates <- matrix(NA, nrow = B, ncol = m)
#   ci_lower <- matrix(NA, nrow = B, ncol = m)
#   ci_upper <- matrix(NA, nrow = B, ncol = m)
#   converged <- logical(B)
#   omega_counts <- list()
# 
#   for (b in 1:B) {
#     df_b <- gen(theta, n = n, p = 0.3, observe = cfg$observe)
# 
#     if (b == 1) {
#       omega_counts[[cfg_name]] <- table(df_b$omega)
#     }
# 
#     tryCatch({
#       result_b <- solver(df_b, par = theta0, method = "Nelder-Mead")
#       estimates[b, ] <- result_b$par
#       se_estimates[b, ] <- sqrt(diag(result_b$vcov))
#       z <- qnorm(1 - alpha / 2)
#       ci_lower[b, ] <- result_b$par - z * se_estimates[b, ]
#       ci_upper[b, ] <- result_b$par + z * se_estimates[b, ]
#       converged[b] <- result_b$converged
#     }, error = function(e) {
#       converged[b] <<- FALSE
#     })
#   }
# 
#   valid <- converged & !is.na(estimates[, 1])
#   est_valid <- estimates[valid, , drop = FALSE]
# 
#   bias <- colMeans(est_valid) - theta
#   variance <- apply(est_valid, 2, var)
#   mse <- bias^2 + variance
# 
#   coverage <- numeric(m)
#   for (j in 1:m) {
#     valid_j <- valid & !is.na(ci_lower[, j])
#     covered <- ci_lower[valid_j, j] <= theta[j] &
#       theta[j] <= ci_upper[valid_j, j]
#     coverage[j] <- mean(covered)
#   }
# 
#   sim_results[[cfg_name]] <- list(
#     name = cfg$name,
#     bias = bias,
#     variance = variance,
#     mse = mse,
#     coverage = coverage,
#     mean_mse = mean(mse),
#     mean_coverage = mean(coverage),
#     convergence_rate = mean(converged),
#     omega_sample = if (length(omega_counts) > 0) omega_counts[[cfg_name]] else NULL
#   )
# }

## ----sim-save, eval=run_long, include=FALSE-----------------------------------
# saveRDS(list(
#   sim_results = sim_results,
#   configs = configs,
#   theta = theta,
#   m = m,
#   n = n,
#   B = B,
#   alpha = alpha,
#   cross_model_results = if (exists("cross_model_results")) cross_model_results else NULL
# ), "precomputed_censoring_comparison.rds")

## ----sim-table----------------------------------------------------------------
summary_df <- data.frame(
  Config = names(sim_results),
  Description = sapply(sim_results, function(x) x$name),
  Mean_Bias = sapply(sim_results, function(x) mean(abs(x$bias))),
  Mean_MSE = sapply(sim_results, function(x) x$mean_mse),
  Mean_RMSE = sapply(sim_results, function(x) sqrt(x$mean_mse)),
  Mean_Coverage = sapply(sim_results, function(x) x$mean_coverage),
  Conv_Rate = sapply(sim_results, function(x) x$convergence_rate),
  stringsAsFactors = FALSE, row.names = NULL
)

knitr::kable(summary_df, digits = 4,
             caption = "Estimation quality by monitoring configuration",
             col.names = c("Config", "Description", "Mean |Bias|",
                          "Mean MSE", "Mean RMSE", "Coverage", "Conv. Rate"))

## ----sim-plot, fig.width=8, fig.height=5--------------------------------------
cfg_labels <- sapply(sim_results, function(x) x$name)
mse_vals <- sapply(sim_results, function(x) x$mean_mse)
cov_vals <- sapply(sim_results, function(x) x$mean_coverage)

oldpar <- par(mfrow = c(1, 2), mar = c(7, 4, 3, 1))
on.exit(par(oldpar))

# MSE comparison
bp <- barplot(mse_vals, names.arg = names(sim_results),
              col = c("steelblue", "coral", "goldenrod", "mediumpurple", "seagreen"),
              main = "Mean MSE by Configuration",
              ylab = "Mean MSE", las = 1)
text(bp, mse_vals + max(mse_vals) * 0.03,
     labels = round(mse_vals, 4), cex = 0.8)

# Coverage comparison
barplot(cov_vals, names.arg = names(sim_results),
        col = c("steelblue", "coral", "goldenrod", "mediumpurple", "seagreen"),
        main = "Mean Coverage by Configuration",
        ylab = "Coverage Probability", las = 1,
        ylim = c(0.85, 1.0))
abline(h = 1 - alpha, lty = 2, col = "red", lwd = 2)
text(x = bp, y = 0.86, labels = round(cov_vals, 3), cex = 0.8)
legend("bottomright", legend = "Nominal 95%", lty = 2, col = "red",
       lwd = 2, cex = 0.8)

## ----sim-component-table------------------------------------------------------
comp_rows <- list()
for (cfg_name in names(sim_results)) {
  res <- sim_results[[cfg_name]]
  for (j in 1:m) {
    comp_rows[[length(comp_rows) + 1]] <- data.frame(
      Config = cfg_name,
      Component = j,
      True = theta[j],
      Bias = res$bias[j],
      MSE = res$mse[j],
      Coverage = res$coverage[j],
      stringsAsFactors = FALSE
    )
  }
}
comp_df <- do.call(rbind, comp_rows)

knitr::kable(comp_df, digits = 4, row.names = FALSE,
             caption = "Per-component estimation metrics by configuration",
             col.names = c("Config", "Comp.", "True", "Bias", "MSE", "Coverage"))

## ----cross-model-demo---------------------------------------------------------
# Generate mixed-censoring data from exponential DGP
set.seed(42)
theta_exp <- c(1.0, 1.1, 0.95)
exp_model <- exp_series_md_c1_c2_c3()
gen_exp <- rdata(exp_model)

df_cross <- gen_exp(theta_exp, n = 300, p = 0.3,
                    observe = observe_mixture(
                      observe_right_censor(tau = 5),
                      observe_left_censor(tau = 3),
                      observe_periodic(delta = 0.5, tau = 5),
                      weights = c(0.5, 0.2, 0.3)
                    ))

cat("Observation types:\n")
print(table(df_cross$omega))

## ----cross-model-loglik-------------------------------------------------------
# Exponential loglik at true parameters
ll_exp_fn <- loglik(exp_model)
ll_exp_val <- ll_exp_fn(df_cross, theta_exp)

# Homogeneous Weibull with k=1, scales = 1/rates
hom_model <- wei_series_homogeneous_md_c1_c2_c3()
ll_hom_fn <- loglik(hom_model)
scales_from_rates <- 1 / theta_exp
ll_hom_val <- ll_hom_fn(df_cross, c(1, scales_from_rates))

cat("Exponential log-likelihood:          ", round(ll_exp_val, 4), "\n")
cat("Homogeneous Weibull (k=1) log-likelihood:", round(ll_hom_val, 4), "\n")
cat("Difference:                          ",
    formatC(ll_exp_val - ll_hom_val, format = "e", digits = 2), "\n")

## ----cross-model-fit, warning=FALSE-------------------------------------------
# Fit both models
solver_exp <- fit(exp_model)
solver_hom <- fit(hom_model)

mle_exp <- solver_exp(df_cross, par = rep(1, 3), method = "Nelder-Mead")
mle_hom <- solver_hom(df_cross, par = c(1, rep(1, 3)), method = "Nelder-Mead")

cat("Exponential MLE (rates):", round(mle_exp$par, 4), "\n")
cat("Homogeneous Weibull MLE (k, scales):", round(mle_hom$par, 4), "\n")
cat("\nExponential loglik at MLE:", round(mle_exp$loglik, 4), "\n")
cat("Weibull loglik at MLE:    ", round(mle_hom$loglik, 4), "\n")

## ----cross-model-shape--------------------------------------------------------
cat("Estimated shape k:", round(mle_hom$par[1], 4), "\n")
cat("(Expected: 1.0 for exponential DGP)\n")

## ----timing-demo--------------------------------------------------------------
# Time loglik evaluation on mixed-censoring data
set.seed(42)
df_large <- gen_exp(theta_exp, n = 1000, p = 0.3,
                    observe = observe_mixture(
                      observe_right_censor(tau = 5),
                      observe_periodic(delta = 0.5, tau = 5),
                      weights = c(0.5, 0.5)
                    ))

wei_model <- wei_series_md_c1_c2_c3()
ll_wei_fn <- loglik(wei_model)
wei_par <- c(1, 1/theta_exp[1], 1, 1/theta_exp[2], 1, 1/theta_exp[3])

t_exp <- system.time(replicate(10, ll_exp_fn(df_large, theta_exp)))
t_hom <- system.time(replicate(10, ll_hom_fn(df_large, c(1, 1/theta_exp))))
t_wei <- system.time(replicate(10, ll_wei_fn(df_large, wei_par)))

timing_df <- data.frame(
  Model = c("Exponential", "Homogeneous Weibull", "Heterogeneous Weibull"),
  Time_10_evals = round(c(t_exp["elapsed"], t_hom["elapsed"],
                          t_wei["elapsed"]), 3),
  Method = c("Closed-form", "Closed-form", "Numerical integration"),
  stringsAsFactors = FALSE
)
knitr::kable(timing_df, caption = "Log-likelihood evaluation time (10 evaluations, n=1000)",
             col.names = c("Model", "Time (s)", "Left/Interval Method"))

## ----cleanup, include=FALSE---------------------------------------------------
options(old_opts)

