## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>"
)

## ----load-data----------------------------------------------------------------
library(ROOT)

data(diabetes_data, package = "ROOT")
str(diabetes_data)

## ----explore-data-------------------------------------------------------------
# How many trial vs target population units?
table(S = diabetes_data$S)

# Treatment breakdown within the trial
table(Tr = diabetes_data$Tr[diabetes_data$S == 1])

## ----overlap------------------------------------------------------------------
# Mean of each covariate by S
covariate_cols <- c("Age45", "DietYes", "Race_Black", "Sex_Male")

overlap <- sapply(covariate_cols, function(v) {
  tapply(diabetes_data[[v]], diabetes_data$S, mean, na.rm = TRUE)
})

knitr::kable(
  t(overlap),
  digits  = 3,
  caption = "Covariate means by sample membership (S = 1: trial, S = 0: target)"
)

## ----fit, message = FALSE, warning = FALSE------------------------------------
gen_fit <- characterizing_underrep(
  data                  = diabetes_data,
  generalizability_path = TRUE,
  num_trees             = 20,
  top_k_trees           = TRUE,
  k                     = 10,
  seed                  = 123
)

## ----print--------------------------------------------------------------------
print(gen_fit)

## ----summary------------------------------------------------------------------
summary(gen_fit)

## ----leaf-summary-------------------------------------------------------------
gen_fit$leaf_summary

## ----plot, fig.width = 7, fig.height = 5, fig.alt = "Characterized tree for diabetes generalizability analysis"----
plot(gen_fit)

