## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse  = TRUE,
  message   = FALSE,
  warning   = FALSE,
  fig.align = "center",
  fig.height = 5,
  fig.width  = 7,
  fig.path   = "fig/",
  dev        = "png",
  comment    = "#>"
)

# packages to be cited in packages.bib
.to.cite <- c("nestedLogit", "nnet", "car", "ggplot2", "AER", "dplyr",
               "mlogit", "vcd", "MASS")

## ----load-packages------------------------------------------------------------
library(nestedLogit)
library(nnet)       # for: multinom()
library(car)        # for: Anova()
library(ggplot2)

## ----chile-data---------------------------------------------------------------
data("Chile", package = "carData")
str(Chile)

## ----chile-drop---------------------------------------------------------------
colSums(is.na(Chile))

# Drop the 168 rows with missing vote so all models use the same cases
chile <- Chile[!is.na(Chile$vote), ]

# Recode vote with longer labels, ordered by median statusquo (No < Abstain < Undec < Yes)
chile$vote <- factor(chile$vote,
                     levels = c("N", "A", "U", "Y"),
                     labels = c("No", "Abstain", "Undec", "Yes"))
nrow(chile)
table(chile$vote)

## ----chile-eda----------------------------------------------------------------
# statusquo distribution by vote
boxplot(statusquo ~ vote, data = chile,
        xlab = "Vote", ylab = "Attitude toward status quo",
        main  = "Chile 1988: status quo attitude by voting intention")

## ----chile-dichots-eng--------------------------------------------------------
dichots_eng <- logits(
  engage    = dichotomy(engaged    = c("Yes", "No"),
                        disengaged = c("Abstain", "Undec")),
  direction = dichotomy("Yes", "No"),
  disengage = dichotomy("Abstain", "Undec")
)

# print method
dichots_eng

# print as an ASCII tree
as.tree(dichots_eng, response = "vote")

## ----chile-dichots-dir--------------------------------------------------------
dichots_dir <- logits(
  direction  = dichotomy(pro  = c("Yes", "Undec"),
                         anti = c("No", "Abstain")),
  engage_pro = dichotomy("Yes", "Undec"),
  engage_ant = dichotomy("No", "Abstain")
)
dichots_dir

## ----chile-fit----------------------------------------------------------------
mod.formula <- vote ~ statusquo + age + sex + education + income

# Multinomial logit baseline (reference = "No")
chile2 <- within(chile, vote <- relevel(vote, ref = "No"))
chile_multi <- multinom(mod.formula, data = chile2, trace = FALSE)

# Nested logit: engagement-first tree
chile_eng <- nestedLogit(mod.formula, dichotomies = dichots_eng, data = chile)

# Nested logit: direction-first tree
chile_dir <- nestedLogit(mod.formula, dichotomies = dichots_dir, data = chile)

## ----chile-summaries, eval=FALSE----------------------------------------------
# summary(chile_eng)
# summary(chile_dir)

## ----chile-anova--------------------------------------------------------------
Anova(chile_eng)
Anova(chile_dir)

## ----chile-aic----------------------------------------------------------------
data.frame(
  model   = c("Multinomial", "Engagement tree", "Direction tree"),
  logLik  = c(logLik(chile_multi), logLik(chile_eng), logLik(chile_dir)),
  AIC     = c(AIC(chile_multi),    AIC(chile_eng),    AIC(chile_dir))
)

## ----chile-newdata------------------------------------------------------------
new_chile <- data.frame(
  statusquo = seq(-1.5, 1.5, by = 0.25),
  age       = median(chile$age,    na.rm = TRUE),
  sex       = factor("F",  levels = levels(chile$sex)),
  education = factor("S",  levels = levels(chile$education)),
  income    = median(chile$income, na.rm = TRUE)
)

## ----chile-predict------------------------------------------------------------
pred_eng <- as.data.frame(predict(chile_eng, newdata = new_chile))
pred_dir <- as.data.frame(predict(chile_dir, newdata = new_chile))

## ----chile-plot, fig.height=5, fig.width=9------------------------------------
# as.data.frame() returns long format: one row per (observation × response level)
# with predictor columns, plus 'response', 'p', 'se.p', 'logit', 'se.logit'
pred_long <- rbind(
  transform(pred_eng, model = "Engagement-first tree"),
  transform(pred_dir, model = "Direction-first tree")
)

ggplot(pred_long, aes(x = statusquo, y = p, colour = response, fill = response)) +
  geom_ribbon(aes(ymin = pmax(0, p - 1.96 * se.p),
                  ymax = pmin(1, p + 1.96 * se.p)),
              alpha = 0.15, colour = NA) +
  geom_line(linewidth = 1.2) +
  geom_point(color = "blacK") +
  facet_wrap(~ model) +
  labs(x      = "Attitude toward status quo",
       y      = "Predicted probability",
       colour = "Vote", fill = "Vote",
       title  = "Chile 1988: predicted vote probabilities by tree structure") +
  theme_bw()

## ----chile-plot-logit, fig.height=5, fig.width=6------------------------------
# Logit-scale plot using the new scale= argument
plot(chile_eng, "statusquo",
     others = list(age       = median(chile$age, na.rm = TRUE),
                   sex       = "F",
                   education = "S",
                   income    = median(chile$income, na.rm = TRUE)),
     scale  = "logit",
     xlab   = "Attitude toward status quo",
     main   = "Engagement-first tree")

## ----travel-pkg, include=FALSE------------------------------------------------
have_AER <- requireNamespace("AER",   quietly = TRUE)
have_dplyr <- requireNamespace("dplyr", quietly = TRUE)

## ----travel-eval, eval=have_AER && have_dplyr---------------------------------
library(AER)
library(dplyr)
data("TravelMode", package = "AER")
str(TravelMode)

## ----travel-reshape, eval=have_AER && have_dplyr------------------------------
tm <- TravelMode |>
  filter(choice == "yes") |>
  select(mode, income, size) |>
  mutate(mode = relevel(factor(mode), ref = "car"))

table(tm$mode)

## ----travel-dichots, eval=have_AER && have_dplyr------------------------------
travel_dichots <- logits(
  pvt_pub  = dichotomy("car", public = c("air", "train", "bus")),
  air_grnd = dichotomy("air", ground = c("train", "bus")),
  tr_bus   = dichotomy("train", "bus")
)
travel_dichots

## ----travel-fit, eval=have_AER && have_dplyr----------------------------------
tm_multi  <- multinom(mode ~ income + size, data = tm, trace = FALSE)
tm_nested <- nestedLogit(mode ~ income + size,
                         dichotomies = travel_dichots,
                         data = tm)
summary(tm_nested)
Anova(tm_nested)

## ----travel-predict, eval=have_AER && have_dplyr------------------------------
new_tm <- data.frame(income = seq(10, 60, by = 10), size = 1)

# as.data.frame() returns long format; reshape to wide for comparison
pred_tm_nested_long <- as.data.frame(predict(tm_nested, newdata = new_tm))
nested_wide <- reshape(
  pred_tm_nested_long[, c("income", "size", "response", "p")],
  idvar     = c("income", "size"),
  timevar   = "response",
  direction = "wide"
)
names(nested_wide) <- sub("^p\\.", "", names(nested_wide))

pred_tm_multi <- as.data.frame(predict(tm_multi, newdata = new_tm,
                                       type = "probs"))

cat("Nested logit:\n")
cbind(income = new_tm$income, round(nested_wide[, levels(tm$mode)], 3))

cat("Multinomial logit:\n")
cbind(income = new_tm$income, round(pred_tm_multi[, levels(tm$mode)], 3))

## ----travel-aic, eval=have_AER && have_dplyr----------------------------------
data.frame(
  model  = c("Multinomial", "Nested"),
  logLik = c(logLik(tm_multi), logLik(tm_nested)),
  AIC    = c(AIC(tm_multi),    AIC(tm_nested))
)

## ----fishing-pkg, include=FALSE-----------------------------------------------
have_mlogit <- requireNamespace("mlogit", quietly = TRUE)

## ----fishing-load, eval=have_mlogit-------------------------------------------
data("Fishing", package = "mlogit")
str(Fishing)

## ----fishing-dichots, eval=have_mlogit----------------------------------------
fishing_dichots <- logits(
  shore_vessel = dichotomy(shore  = c("beach", "pier"),
                           vessel = c("boat",  "charter")),
  shore_type   = dichotomy("beach", "pier"),
  vessel_type  = dichotomy("boat",  "charter")
)
fishing_dichots
as.tree(fishing_dichots, response = "mode")

## ----fishing-fit, eval=have_mlogit--------------------------------------------
fish_nested <- nestedLogit(mode ~ income,
                           dichotomies = fishing_dichots,
                           data = Fishing)
summary(fish_nested)
Anova(fish_nested)

## ----fishing-plot, eval=have_mlogit-------------------------------------------
plot(fish_nested, x.var = "income",
     xlab  = "Monthly income (USD)",
     main  = "Fishing mode choice vs. income",
     label=TRUE, label.col="black",
     cex.lab = 1.2)

## ----arthritis-pkg, include=FALSE---------------------------------------------
have_vcd <- requireNamespace("vcd", quietly = TRUE)

## ----arthritis-load, eval=have_vcd--------------------------------------------
data("Arthritis", package = "vcd")
str(Arthritis)
table(Arthritis$Improved)

## ----arthritis-dichots, eval=have_vcd-----------------------------------------
arth_dichots <- continuationLogits(c("None", "Some", "Marked"))
arth_dichots

## ----arthritis-fit, eval=have_vcd---------------------------------------------
arth_nested <- nestedLogit(Improved ~ Treatment + Sex + Age,
                           dichotomies = arth_dichots,
                           data = Arthritis)
summary(arth_nested)
Anova(arth_nested)

## ----arthritis-plot, eval=have_vcd--------------------------------------------
plot(arth_nested, x.var = "Age",
     others = list(Treatment = "Treated", Sex = "Female"),
     xlab   = "Age",
     main   = "Arthritis: Treatment = Treated, Sex = Female")

## ----survey-load--------------------------------------------------------------
data("survey", package = "MASS")
# Drop NAs on Smoke
sv <- survey[!is.na(survey$Smoke), ]
table(sv$Smoke)

## ----smoke-dichots------------------------------------------------------------
smoke_dichots <- continuationLogits(c("Never", "Occas", "Regul", "Heavy"))
smoke_dichots

## ----smoke-fit----------------------------------------------------------------
smoke_nested <- nestedLogit(Smoke ~ Sex + Age + Exer,
                            dichotomies = smoke_dichots,
                            data = sv)
summary(smoke_nested)
Anova(smoke_nested)

## ----smoke-plot---------------------------------------------------------------
plot(smoke_nested, x.var = "Age",
     others = list(Sex = "Female", Exer = "Some"),
     xlab   = "Age",
     main   = "Smoking: Female students, some exercise")

## ----pneumo-pkg, include=FALSE------------------------------------------------
have_VGAM <- requireNamespace("VGAM", quietly = TRUE)

## ----pneumo-show, eval=have_VGAM----------------------------------------------
data("pneumo", package = "VGAM")
pneumo

## ----pneumo-fit, eval=have_VGAM-----------------------------------------------
# pneumo is aggregated; nestedLogit() needs individual-level data or
# frequency weights — requires expansion or a weighted interface.
# Placeholder: would use continuationLogits(c("normal", "mild", "severe"))

## ----session-info-------------------------------------------------------------
sessionInfo()

## ----write-bib, echo=FALSE----------------------------------------------------
pkgs <- unique(c(.to.cite, .packages()))
knitr::write_bib(pkgs, file = here::here("vignettes", "packages.bib"))

