## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)

## ----setup--------------------------------------------------------------------
library(causaldef)

# Load the RHC dataset
data(rhc)

cat("Dataset dimensions:", nrow(rhc), "patients,", ncol(rhc), "variables\n")

## ----explore------------------------------------------------------------------
# Key variables
cat("Treatment distribution (swang1 = RHC):\n")
table(rhc$swang1)

cat("\nOutcome distribution (death):\n")
table(rhc$death)

## ----audit--------------------------------------------------------------------
# Prepare data - convert death to numeric for auditing
rhc_clean <- rhc
rhc_clean$death_num <- as.numeric(rhc_clean$death == "Yes")

# Select relevant numeric and factor columns for audit
# (excluding IDs, dates, and the outcome/treatment themselves)
exclude_cols <- c("X", "ptid", "sadmdte", "dschdte", "dthdte", "lstctdte",
                  "swang1", "death", "death_num")
audit_cols <- setdiff(names(rhc_clean), exclude_cols)

# Run the audit
report <- audit_data(
  data = rhc_clean,
  treatment = "swang1",
  outcome = "death_num",
  covariates = audit_cols[1:25],  # First 25 covariates for demonstration
  alpha = 0.01,  # Stricter significance level
  verbose = FALSE
)

print(report)

## ----confounders--------------------------------------------------------------
# Filter to see only confounders
confounders <- report$issues[report$issues$issue_type == "Confounder", ]
if (nrow(confounders) > 0) {
  cat("Detected Confounders (must adjust for these):\n\n")
  print(confounders[, c("variable", "r_treatment", "r_outcome", "p_value")])
}

## ----subgroup-----------------------------------------------------------------
# Audit cardiac patients only
cardiac_patients <- rhc_clean[rhc_clean$card == 1, ]

if (nrow(cardiac_patients) > 50) {
  report_cardiac <- audit_data(
    data = cardiac_patients,
    treatment = "swang1",
    outcome = "death_num",
    covariates = audit_cols[1:15],
    alpha = 0.01,
    verbose = FALSE
  )
  
  cat("=== Cardiac Patients Subgroup ===\n")
  cat("Sample size:", nrow(cardiac_patients), "\n")
  cat("Issues found:", report_cardiac$summary_stats$n_issues, "\n")
  cat("Confounders:", report_cardiac$summary_stats$n_confounders, "\n")
}

## ----spec---------------------------------------------------------------------
# Get the list of detected confounders
confounder_vars <- report$issues$variable[report$issues$issue_type == "Confounder"]

# If we have confounders, build a proper causal specification
if (length(confounder_vars) > 0) {
  # Use detected confounders in causal spec
  spec <- causal_spec(
    data = rhc_clean,
    treatment = "swang1",
    outcome = "death_num",
    covariates = confounder_vars
  )
  
  print(spec)
}

## ----summary------------------------------------------------------------------
# Summary statistics from the audit
cat("\n=== Audit Summary ===\n")
cat("Variables audited:", report$summary_stats$n_vars_audited, "\n")
cat("Total issues:", report$summary_stats$n_issues, "\n")
cat("  - Confounders:", report$summary_stats$n_confounders, "\n")
cat("  - Potential instruments:", report$summary_stats$n_instruments, "\n")

