## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
 collapse = TRUE,
 comment = "#>"
)

## ----libraries, message = FALSE, warning = FALSE------------------------------
library(dplyr)
library(rbmi)
library(rbmiUtils)

## ----load-data----------------------------------------------------------------
data("ADMI", package = "rbmiUtils")  # Full imputed dataset
data("ADEFF", package = "rbmiUtils") # Original data with missing values

# Check dimensions
cat("Full imputed dataset (ADMI):", nrow(ADMI), "rows\n")
cat("Number of imputations:", length(unique(ADMI$IMPID)), "\n")

## ----prepare-original---------------------------------------------------------
original <- ADEFF |>
 mutate(
   TRT = TRT01P,
   USUBJID = as.character(USUBJID)
 )

# Count missing values
n_missing <- sum(is.na(original$CHG))
cat("Missing values in original data:", n_missing, "\n")

## ----define-vars--------------------------------------------------------------
vars <- set_vars(
 subjid = "USUBJID",
 visit = "AVISIT",
 group = "TRT",
 outcome = "CHG"
)

## ----reduce-------------------------------------------------------------------
reduced <- reduce_imputed_data(ADMI, original, vars)

cat("Full imputed rows:", nrow(ADMI), "\n")
cat("Reduced rows:", nrow(reduced), "\n")
cat("Compression ratio:", round(100 * nrow(reduced) / nrow(ADMI), 1), "%\n")

## ----examine-reduced----------------------------------------------------------
# First few rows
head(reduced)

# Structure matches original imputed data
cat("\nColumns in reduced data:\n")
cat(paste(names(reduced), collapse = ", "))

## ----expand-------------------------------------------------------------------
expanded <- expand_imputed_data(reduced, original, vars)

cat("Expanded rows:", nrow(expanded), "\n")
cat("Original ADMI rows:", nrow(ADMI), "\n")

## ----verify-------------------------------------------------------------------
# Sort both datasets for comparison
admi_sorted <- ADMI |>
 arrange(IMPID, USUBJID, AVISIT)

expanded_sorted <- expanded |>
 arrange(IMPID, USUBJID, AVISIT)

# Compare CHG values
all_equal <- all.equal(
 admi_sorted$CHG,
 expanded_sorted$CHG,
 tolerance = 1e-10
)

cat("Data integrity check:", all_equal, "\n")

## ----save-workflow, eval = FALSE----------------------------------------------
# # After imputation
# impute_obj <- impute(draws_obj, references = c("Placebo" = "Placebo", "Drug A" = "Placebo"))
# full_imputed <- get_imputed_data(impute_obj)
# 
# # Reduce for storage
# reduced <- reduce_imputed_data(full_imputed, original_data, vars)
# 
# # Save both (reduced is much smaller)
# saveRDS(reduced, "imputed_reduced.rds")
# saveRDS(original_data, "original_data.rds")

## ----load-workflow, eval = FALSE----------------------------------------------
# # Load saved data
# reduced <- readRDS("imputed_reduced.rds")
# original_data <- readRDS("original_data.rds")
# 
# # Expand when needed for analysis
# full_imputed <- expand_imputed_data(reduced, original_data, vars)
# 
# # Run analysis
# ana_obj <- analyse_mi_data(
#  data = full_imputed,
#  vars = vars,
#  method = method,
#  fun = ancova
# )

## ----no-missing, eval = FALSE-------------------------------------------------
# # If original has no missing values
# reduced <- reduce_imputed_data(full_imputed, complete_data, vars)
# nrow(reduced)
# #> [1] 0
# 
# # expand_imputed_data handles this correctly
# expanded <- expand_imputed_data(reduced, complete_data, vars)
# # Returns original data with IMPID = "1"

