## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(causaldef)
library(stats)

if (!exists("deparse1", envir = baseenv())) {
  deparse1 <- function(expr, collapse = " ", width.cutoff = 500L, ...) {
    paste(deparse(expr, width.cutoff, ...), collapse = collapse)
  }
}

## ----lalonde-data-------------------------------------------------------------
data("nsw_benchmark")
head(nsw_benchmark)

# 1. The Experimental Benchmark (Gold Standard)
nsw_exp <- subset(nsw_benchmark, sample_id %in% c("nsw_treated", "nsw_control"))

# True Experimental Estimate (Unadjusted difference in means is valid due to randomization)
true_att <- mean(nsw_exp$re78[nsw_exp$treat == 1]) - mean(nsw_exp$re78[nsw_exp$treat == 0])
cat("True Experimental ATT:", round(true_att, 2), "\n")

# 2. The Observational Challenge (NSW Treated + CPS Control)
nsw_obs <- subset(nsw_benchmark, sample_id %in% c("nsw_treated", "cps_control"))

# Naive Observational Estimate (Unadjusted)
naive_est <- mean(nsw_obs$re78[nsw_obs$treat == 1]) - mean(nsw_obs$re78[nsw_obs$treat == 0])
cat("Naive Observational Estimate:", round(naive_est, 2), "\n")
cat("Bias:", round(naive_est - true_att, 2), "\n")

## ----lalonde-def--------------------------------------------------------------
covariates <- c("age", "education", "black", "hispanic", "married", "nodegree", "re74", "re75")

# Specification
spec_lalonde <- causal_spec(
  data = nsw_obs,
  treatment = "treat",
  outcome = "re78",
  covariates = covariates
)

# Estimate deficiency using Propensity Score Weighting (IPTW)
res_lalonde <- estimate_deficiency(
  spec_lalonde,
  methods = c("unadjusted", "iptw"),
  n_boot = 50 # Kept low for vignette speed
)

print(res_lalonde)
plot(res_lalonde)

## ----rhc-data-----------------------------------------------------------------
data("rhc")

# Convert treatment to binary (0/1) for causaldef
# Assuming 'swang1' is the treatment column, usually "RHC" vs "No RHC"
# Check levels first (simulated check here, dataset structure assumed from documentation)
if (is.factor(rhc$swang1)) {
  rhc$treat_bin <- as.numeric(rhc$swang1) - 1 # Assuming factor levels order
} else {
  rhc$treat_bin <- rhc$swang1
}

# Outcome: 30-day survival (inverse of dth30) or just standard outcome
# Let's say outcome is dth30 (binary).
if (is.factor(rhc$dth30)) {
  rhc$outcome_bin <- as.numeric(rhc$dth30) - 1
} else {
  rhc$outcome_bin <- rhc$dth30
}

# Select a subset of covariates for demonstration (to keep it fast)
# Real analysis would use all 50+.
rhc_covars <- c("age", "sex", "race", "aps1", "cat1") 
# Note: 'aps1' is APACHE III score


spec_rhc <- causal_spec(
  data = rhc,
  treatment = "treat_bin",
  outcome = "outcome_bin",
  covariates = rhc_covars
)

## ----rhc-def------------------------------------------------------------------
res_rhc <- estimate_deficiency(spec_rhc, methods = "iptw", n_boot = 0)
print(res_rhc)

## ----rhc-policy---------------------------------------------------------------
# Utility: Let's say preventing death has utility 1, death has utility 0.
# The outcome is death (1) or survival (0). 
# We want to minimize outcome (death). 
# This is equivalent to utility range [0, 1].

bounds_rhc <- policy_regret_bound(res_rhc, utility_range = c(0, 1), method = "iptw")
print(bounds_rhc)

## ----rhc-frontier, fig.height=5, fig.width=6----------------------------------
frontier <- confounding_frontier(spec_rhc, grid_size = 30)
plot(frontier)