## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ----setup--------------------------------------------------------------------
library(syntheticdata)

## ----real-data----------------------------------------------------------------
set.seed(42)
real <- data.frame(
  age     = rnorm(500, mean = 65, sd = 12),
  sbp     = rnorm(500, mean = 135, sd = 22),
  sex     = sample(c("Male", "Female"), 500, replace = TRUE),
  smoking = sample(c("Never", "Former", "Current"), 500,
                   replace = TRUE, prob = c(0.4, 0.35, 0.25)),
  outcome = rbinom(500, 1, 0.28)
)
head(real)

## ----synthesize---------------------------------------------------------------
syn <- synthesize(real, method = "parametric", n = 500, seed = 1)
syn

## ----validate-----------------------------------------------------------------
val <- validate_synthetic(syn)
val

## ----compare------------------------------------------------------------------
comp <- compare_methods(real, seed = 1)
comp

## ----privacy------------------------------------------------------------------
pr <- privacy_risk(syn, sensitive_cols = "age")
pr

## ----fidelity-----------------------------------------------------------------
mf <- model_fidelity(syn, outcome = "outcome")
mf

## ----tradeoff-----------------------------------------------------------------
results <- list()
for (nl in c(0.05, 0.1, 0.2, 0.5)) {
  s <- synthesize(real, method = "noise", noise_level = nl, seed = 1)
  v <- validate_synthetic(s)
  results <- c(results, list(data.frame(
    noise_level = nl,
    ks = v$value[v$metric == "ks_statistic_mean"],
    privacy = v$value[v$metric == "nn_distance_ratio"]
  )))
}
do.call(rbind, results)

