## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse  = TRUE,
  comment   = "#>",
  fig.width = 8,
  fig.height = 5,
  warning   = FALSE,
  message   = FALSE
)

## ----libraries----------------------------------------------------------------
library(FFdownload)
library(dplyr)
library(tidyr)
library(purrr)
library(ggplot2)

## ----download, cache=TRUE-----------------------------------------------------
outd   <- file.path(tempdir(), paste0("ffap_", format(Sys.time(), "%H%M%S")))
outf   <- tempfile(fileext = ".RData")

inputlist <- c(
  "25_Portfolios_5x5",
  "100_Portfolios_10x10",
  "F-F_Research_Data_Factors",
  "F-F_Research_Data_5_Factors_2x3",
  "F-F_Momentum_Factor"
)

FFdata <- FFdownload(
  output_file   = outf,
  tempd         = outd,
  exclude_daily = TRUE,
  download      = TRUE,
  download_only = FALSE,
  inputlist     = inputlist,
  format        = "tbl",
  na_values     = c(-99, -999, -99.99),
  return_data   = TRUE
)

## ----factors------------------------------------------------------------------
# FF5 factors: Mkt.RF, SMB, HML, RMW, CMA, RF  (start: Jul 1963)
ff5_raw <- FFdata$`x_F-F_Research_Data_5_Factors_2x3`$monthly$Temp2

# FF3 factors back to Jul 1926 (for time-series tests)
ff3_raw <- FFdata$`x_F-F_Research_Data_Factors`$monthly$Temp2

# Momentum factor
mom_raw <- FFdata$`x_F-F_Momentum_Factor`$monthly$Temp2

# Combined factor table (full history where all factors are available)
factors <- ff5_raw |>
  select(date, Mkt.RF, SMB, HML, RMW, CMA, RF) |>
  left_join(mom_raw |> select(date, Mom), by = "date") |>
  filter(!is.na(Mom))    # Mom starts Jan 1927; FF5 starts Jul 1963 → ~Jul 1963

# Factor table with FF3 history (no RMW/CMA/Mom)
factors_ff3hist <- ff3_raw |>
  select(date, Mkt.RF, SMB, HML, RF)

cat("Factor sample:", format(min(factors$date)), "to", format(max(factors$date)),
    "—", nrow(factors), "months\n")
cat("FF3 history  :", format(min(factors_ff3hist$date)), "to",
    format(max(factors_ff3hist$date)), "—", nrow(factors_ff3hist), "months\n")

## ----portfolios---------------------------------------------------------------
# Helper: find the value-weighted returns sub-table by name pattern
find_vw <- function(monthly_list) {
  nm <- names(monthly_list)
  vw_nm <- grep("value_weight", nm, ignore.case = TRUE, value = TRUE)
  if (length(vw_nm) == 0) stop("Cannot find value-weighted sub-table. Available: ",
                                paste(nm, collapse = ", "))
  monthly_list[[vw_nm[1]]]
}

# 25 portfolios
pf25_vw <- find_vw(FFdata$x_25_Portfolios_5x5$monthly)
pf25_cols <- setdiff(names(pf25_vw), "date")     # 25 portfolio column names
stopifnot(length(pf25_cols) == 25)

# 100 portfolios
pf100_vw <- find_vw(FFdata$x_100_Portfolios_10x10$monthly)
pf100_cols <- setdiff(names(pf100_vw), "date")   # 100 portfolio column names
stopifnot(length(pf100_cols) == 100)

cat("25-pf column names (first 6):", paste(head(pf25_cols, 6), collapse=", "), "\n")

## ----panels-------------------------------------------------------------------
# Long panel for 25 portfolios — join with FF3 history
pf25_long <- pf25_vw |>
  pivot_longer(-date, names_to = "portfolio", values_to = "ret") |>
  left_join(factors_ff3hist, by = "date") |>
  mutate(
    ret_excess = ret - RF,
    # Position-based size (S1=small → S5=large) and BM (B1=growth → B5=value) labels
    pf_pos  = as.integer(factor(portfolio, levels = pf25_cols)),
    size_q  = ceiling(pf_pos / 5),
    bm_q    = ((pf_pos - 1) %% 5) + 1,
    size_lbl = factor(size_q, labels = c("Small","S2","S3","S4","Large")),
    bm_lbl   = factor(bm_q,   labels = c("Growth","B2","B3","B4","Value"))
  ) |>
  filter(!is.na(ret_excess), !is.na(Mkt.RF))

# Long panel for 100 portfolios — join with FF5+Mom (1963+)
pf100_long <- pf100_vw |>
  pivot_longer(-date, names_to = "portfolio", values_to = "ret") |>
  left_join(factors, by = "date") |>
  mutate(
    ret_excess = ret - RF,
    pf_pos  = as.integer(factor(portfolio, levels = pf100_cols)),
    size_q  = ceiling(pf_pos / 10),
    bm_q    = ((pf_pos - 1) %% 10) + 1
  ) |>
  filter(!is.na(ret_excess), !is.na(Mom))   # FF5+Mom sample (Jul 1963+)

cat("25-pf panel: ", nrow(pf25_long),  "rows,", n_distinct(pf25_long$portfolio),  "portfolios\n")
cat("100-pf panel:", nrow(pf100_long), "rows,", n_distinct(pf100_long$portfolio), "portfolios\n")

## ----ts_capm------------------------------------------------------------------
ts_capm <- pf25_long |>
  nest_by(portfolio, size_q, bm_q, size_lbl, bm_lbl) |>
  mutate(
    fit       = list(lm(ret_excess ~ Mkt.RF, data = data)),
    alpha     = coef(fit)[["(Intercept)"]],
    alpha_se  = summary(fit)$coefficients["(Intercept)", "Std. Error"],
    alpha_t   = summary(fit)$coefficients["(Intercept)", "t value"],
    beta_mkt  = coef(fit)[["Mkt.RF"]],
    r2        = summary(fit)$r.squared,
    n_obs     = nrow(data)
  ) |>
  ungroup() |>
  select(-data, -fit)

cat("CAPM — mean |alpha|:", round(mean(abs(ts_capm$alpha)), 3), "%/month\n")
cat("       significant alphas (|t| > 2):",
    sum(abs(ts_capm$alpha_t) > 2), "of", nrow(ts_capm), "\n")

## ----plot_capm_alpha, fig.height=4--------------------------------------------
ggplot(ts_capm, aes(x = bm_lbl, y = size_lbl, fill = alpha)) +
  geom_tile(colour = "white", linewidth = 0.5) +
  geom_text(aes(label = sprintf("%.2f\n(%.1f)", alpha, alpha_t)),
            size = 2.8, lineheight = 0.9) +
  scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027",
                       midpoint = 0, name = "α (%/mo)") +
  scale_x_discrete(position = "top") +
  labs(title    = "CAPM Alphas — 25 Size×BM Portfolios",
       subtitle = "Cell: alpha (t-stat); sample: Jul 1926 – present",
       x = "Book-to-Market Quintile", y = "Size Quintile") +
  theme_minimal(base_size = 11) +
  theme(axis.text.x = element_text(angle = 0))

## ----ts_ff3-------------------------------------------------------------------
ts_ff3 <- pf25_long |>
  nest_by(portfolio, size_q, bm_q, size_lbl, bm_lbl) |>
  mutate(
    fit      = list(lm(ret_excess ~ Mkt.RF + SMB + HML, data = data)),
    alpha    = coef(fit)[["(Intercept)"]],
    alpha_se = summary(fit)$coefficients["(Intercept)", "Std. Error"],
    alpha_t  = summary(fit)$coefficients["(Intercept)", "t value"],
    b_mkt    = coef(fit)[["Mkt.RF"]],
    b_smb    = coef(fit)[["SMB"]],
    b_hml    = coef(fit)[["HML"]],
    r2       = summary(fit)$r.squared
  ) |>
  ungroup() |>
  select(-data, -fit)

cat("FF3 — mean |alpha|:", round(mean(abs(ts_ff3$alpha)), 3), "%/month\n")
cat("      significant alphas (|t| > 2):",
    sum(abs(ts_ff3$alpha_t) > 2), "of", nrow(ts_ff3), "\n")

## ----plot_ff3_alpha, fig.height=4---------------------------------------------
ggplot(ts_ff3, aes(x = bm_lbl, y = size_lbl, fill = alpha)) +
  geom_tile(colour = "white", linewidth = 0.5) +
  geom_text(aes(label = sprintf("%.2f\n(%.1f)", alpha, alpha_t)),
            size = 2.8, lineheight = 0.9) +
  scale_fill_gradient2(low = "#4575b4", mid = "white", high = "#d73027",
                       midpoint = 0, name = "α (%/mo)") +
  scale_x_discrete(position = "top") +
  labs(title    = "FF3 Alphas — 25 Size×BM Portfolios",
       subtitle = "Cell: alpha (t-stat); sample: Jul 1926 – present",
       x = "Book-to-Market Quintile", y = "Size Quintile") +
  theme_minimal(base_size = 11)

## ----plot_alpha_comparison----------------------------------------------------
bind_rows(
  ts_capm |> mutate(model = "CAPM"),
  ts_ff3  |> mutate(model = "FF3")
) |>
  mutate(
    sig = ifelse(abs(alpha_t) > 2, "p < 0.05", "n.s."),
    label = paste0("S", size_q, "B", bm_q)
  ) |>
  ggplot(aes(x = reorder(label, bm_q + (size_q - 1) * 5),
             y = alpha, colour = model, shape = sig)) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  geom_errorbar(aes(ymin = alpha - 2 * alpha_se, ymax = alpha + 2 * alpha_se),
                width = 0.3, alpha = 0.5) +
  geom_point(size = 2.5) +
  scale_colour_manual(values = c(CAPM = "#e41a1c", FF3 = "#377eb8")) +
  scale_shape_manual(values = c("p < 0.05" = 16, "n.s." = 1)) +
  labs(title    = "Pricing Errors: CAPM vs FF3",
       subtitle = "Portfolios ordered by BM within size quintile; bars = ±2 SE",
       x = "Portfolio (S = size, B = book-to-market quintile)",
       y = "Alpha (%/month)", colour = "Model", shape = "") +
  theme_bw(base_size = 10) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, size = 7))

## ----grs_function-------------------------------------------------------------
grs_test <- function(returns_xmat, factors_xmat) {
  # returns_xmat : T × N matrix of excess portfolio returns (no NAs)
  # factors_xmat : T × K matrix of factor excess returns
  T <- nrow(returns_xmat); N <- ncol(returns_xmat); K <- ncol(factors_xmat)
  stopifnot(T > N + K + 1)

  # OLS: Y = X*B + E  where X = [1 | factors]
  X     <- cbind(1, factors_xmat)
  B     <- solve(crossprod(X), crossprod(X, returns_xmat))  # (K+1) × N
  E     <- returns_xmat - X %*% B                           # T × N residuals
  alpha <- B[1, ]                                           # N intercepts

  # MLE residual covariance
  Sigma <- crossprod(E) / T

  # Factor Sharpe ratio squared
  mu_f  <- colMeans(factors_xmat)
  Omega <- crossprod(scale(factors_xmat, center = TRUE, scale = FALSE)) / T
  sr2   <- as.numeric(t(mu_f) %*% solve(Omega) %*% mu_f)

  # GRS statistic
  grs_stat <- ((T - N - K) / N) *
    as.numeric(t(alpha) %*% solve(Sigma) %*% alpha) / (1 + sr2)
  p_val <- pf(grs_stat, df1 = N, df2 = T - N - K, lower.tail = FALSE)

  list(GRS = round(grs_stat, 3), p_value = round(p_val, 4),
       df1 = N, df2 = T - N - K,
       mean_abs_alpha = round(mean(abs(alpha)), 4))
}

## ----grs_run------------------------------------------------------------------
# Build wide matrices aligned to the same dates
pf25_wide <- pf25_long |>
  select(date, portfolio, ret_excess) |>
  pivot_wider(names_from = portfolio, values_from = ret_excess)

# Merge factors on same dates and drop any rows with NAs
pf25_merged <- pf25_wide |>
  left_join(factors_ff3hist |> select(date, Mkt.RF, SMB, HML), by = "date") |>
  drop_na()

ret_mat_25  <- as.matrix(pf25_merged[, pf25_cols])
capm_f_mat  <- as.matrix(pf25_merged[, "Mkt.RF",  drop = FALSE])
ff3_f_mat   <- as.matrix(pf25_merged[, c("Mkt.RF","SMB","HML")])

grs_capm <- grs_test(ret_mat_25, capm_f_mat)
grs_ff3  <- grs_test(ret_mat_25, ff3_f_mat)

tibble(
  Model                 = c("CAPM", "FF3"),
  `GRS statistic`       = c(grs_capm$GRS,           grs_ff3$GRS),
  `p-value`             = c(grs_capm$p_value,        grs_ff3$p_value),
  `F(N, T-N-K)`         = c(sprintf("F(%d,%d)", grs_capm$df1, grs_capm$df2),
                             sprintf("F(%d,%d)", grs_ff3$df1,  grs_ff3$df2)),
  `Mean |α| (%/mo)`     = c(grs_capm$mean_abs_alpha, grs_ff3$mean_abs_alpha)
) |>
  knitr::kable(digits = 4, align = "lrrrr",
               caption = "GRS Test — 25 Size×BM Portfolios")

## ----momentum-----------------------------------------------------------------
# WML time series
wml <- mom_raw |>
  select(date, Mom) |>
  left_join(factors_ff3hist, by = "date") |>
  filter(!is.na(Mom), !is.na(Mkt.RF))

# Average WML return with t-statistic
wml_mean <- mean(wml$Mom)
wml_t    <- t.test(wml$Mom)$statistic

# CAPM alpha for WML
fit_wml_capm <- lm(Mom ~ Mkt.RF, data = wml)
# FF3 alpha for WML
fit_wml_ff3  <- lm(Mom ~ Mkt.RF + SMB + HML, data = wml)

mom_summary <- tibble(
  Measure                  = c("Mean WML return", "CAPM alpha", "FF3 alpha"),
  `Estimate (%/mo)`        = c(wml_mean,
                                coef(fit_wml_capm)["(Intercept)"],
                                coef(fit_wml_ff3) ["(Intercept)"]),
  `t-statistic`            = c(wml_t,
                                summary(fit_wml_capm)$coef["(Intercept)", "t value"],
                                summary(fit_wml_ff3) $coef["(Intercept)", "t value"])
)

knitr::kable(mom_summary, digits = 3, align = "lrr",
             caption = "Momentum (WML) — Mean Return and Pricing Errors")

## ----momentum_plot, fig.height=4----------------------------------------------
# Rolling 5-year average WML return
wml |>
  mutate(date_num = as.numeric(as.Date(date)),
         roll_mean = zoo::rollapply(Mom, width = 60, FUN = mean,
                                    fill = NA, align = "right")) |>
  ggplot(aes(x = as.Date(date))) +
  geom_col(aes(y = Mom), fill = "grey80", width = 20) +
  geom_line(aes(y = roll_mean), colour = "#e41a1c", linewidth = 1) +
  geom_hline(yintercept = 0, linetype = "dashed") +
  labs(title    = "WML Momentum Factor",
       subtitle = "Bars = monthly return; red line = 60-month rolling average",
       x = NULL, y = "Return (%/month)") +
  theme_bw()

## ----fmb_helpers--------------------------------------------------------------
# Newey-West standard error for the mean of a time series
nw_se <- function(x, lags = NULL) {
  x   <- x[!is.na(x)]
  T   <- length(x)
  if (is.null(lags)) lags <- floor(4 * (T / 100)^(2/9))
  xd  <- x - mean(x)
  v   <- sum(xd^2) / T
  if (lags > 0) {
    for (l in seq_len(lags)) {
      v <- v + 2 * (1 - l / (lags + 1)) * sum(xd[(l+1):T] * xd[1:(T-l)]) / T
    }
  }
  sqrt(v / T)
}

# Full FMB procedure
# returns_mat : T × N matrix of excess returns
# factors_mat : T × K matrix of factors
fmb <- function(returns_mat, factors_mat, factor_names = NULL) {
  T <- nrow(returns_mat); N <- ncol(returns_mat); K <- ncol(factors_mat)
  if (is.null(factor_names)) factor_names <- colnames(factors_mat)

  # ── First pass: full-sample OLS betas ───────────────────────────────────────
  X_ts   <- cbind(1, factors_mat)
  B_ts   <- solve(crossprod(X_ts), crossprod(X_ts, returns_mat))
  betas  <- t(B_ts[-1, , drop = FALSE])   # N × K

  # ── Second pass: monthly cross-sectional regressions ────────────────────────
  X_cs    <- cbind(1, betas)              # N × (K+1)
  gammas  <- matrix(NA_real_, nrow = T, ncol = K + 1)

  for (t in seq_len(T)) {
    y_t   <- returns_mat[t, ]
    ok    <- !is.na(y_t)
    if (sum(ok) > K + 1) {
      gammas[t, ] <- lm.fit(X_cs[ok, , drop = FALSE], y_t[ok])$coefficients
    }
  }

  # ── Average lambdas + Newey-West t-stats ────────────────────────────────────
  lambda_names <- c("Intercept", factor_names)
  lam_mean <- colMeans(gammas, na.rm = TRUE)
  lam_se   <- apply(gammas, 2, nw_se)
  lam_t    <- lam_mean / lam_se

  tibble(
    term       = lambda_names,
    lambda     = lam_mean,
    nw_se      = lam_se,
    t_stat     = lam_t,
    signif     = case_when(
      abs(lam_t) > 3.29 ~ "***",
      abs(lam_t) > 2.58 ~ "**",
      abs(lam_t) > 1.96 ~ "*",
      TRUE               ~ ""
    )
  )
}

## ----fmb_data-----------------------------------------------------------------
# Wide matrices aligned to the same dates (FF5+Mom sample, Jul 1963+)
pf100_wide <- pf100_long |>
  select(date, portfolio, ret_excess) |>
  pivot_wider(names_from = portfolio, values_from = ret_excess)

pf100_merged <- pf100_wide |>
  left_join(factors, by = "date") |>
  drop_na()

ret100   <- as.matrix(pf100_merged[, pf100_cols])
f_capm   <- as.matrix(pf100_merged[, "Mkt.RF",                    drop = FALSE])
f_ff3    <- as.matrix(pf100_merged[, c("Mkt.RF","SMB","HML")])
f_ff4    <- as.matrix(pf100_merged[, c("Mkt.RF","SMB","HML","Mom")])
f_ff5    <- as.matrix(pf100_merged[, c("Mkt.RF","SMB","HML","RMW","CMA")])

cat("FMB sample:", format(min(pf100_merged$date)), "to",
    format(max(pf100_merged$date)), "—", nrow(pf100_merged), "months\n")
cat("100 portfolios x", ncol(ret100), "columns (check: should be 100)\n")

## ----fmb_run------------------------------------------------------------------
res_capm <- fmb(ret100, f_capm, "Mkt.RF")
res_ff3  <- fmb(ret100, f_ff3,  c("Mkt.RF","SMB","HML"))
res_ff4  <- fmb(ret100, f_ff4,  c("Mkt.RF","SMB","HML","Mom"))
res_ff5  <- fmb(ret100, f_ff5,  c("Mkt.RF","SMB","HML","RMW","CMA"))

## ----fmb_table----------------------------------------------------------------
# All factor names across all models
all_terms <- c("Intercept","Mkt.RF","SMB","HML","Mom","RMW","CMA")

fmt_col <- function(res, model_name) {
  res |>
    mutate(cell = sprintf("%.3f%s (%.2f)", lambda, signif, t_stat)) |>
    select(term, cell) |>
    right_join(tibble(term = all_terms), by = "term") |>
    mutate(cell = replace_na(cell, "\u2014")) |>
    rename(!!model_name := cell)
}

fmb_table <- fmt_col(res_capm, "CAPM") |>
  left_join(fmt_col(res_ff3, "FF3"),  by = "term") |>
  left_join(fmt_col(res_ff4, "FF4"),  by = "term") |>
  left_join(fmt_col(res_ff5, "FF5"),  by = "term") |>
  rename(Factor = term)

knitr::kable(fmb_table, align = "lcccc",
             caption = paste(
               "Fama-MacBeth Results — 100 Size x BM Portfolios.",
               "Entries: lambda estimate (significance stars) (NW t-stat).",
               "*, **, *** = |t| > 1.96, 2.58, 3.29."))

## ----fmb_plot, fig.height=5---------------------------------------------------
# Lambda point estimates with 95% NW confidence intervals
bind_rows(
  res_capm |> mutate(model = "CAPM"),
  res_ff3  |> mutate(model = "FF3"),
  res_ff4  |> mutate(model = "FF4 (Carhart)"),
  res_ff5  |> mutate(model = "FF5")
) |>
  filter(term != "Intercept") |>
  mutate(
    lo = lambda - 1.96 * nw_se,
    hi = lambda + 1.96 * nw_se,
    term  = factor(term,  levels = c("Mkt.RF","SMB","HML","Mom","RMW","CMA")),
    model = factor(model, levels = c("CAPM","FF3","FF4 (Carhart)","FF5"))
  ) |>
  ggplot(aes(x = term, y = lambda, colour = model,
             ymin = lo, ymax = hi)) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey50") +
  geom_errorbar(position = position_dodge(width = 0.5), width = 0.25, alpha = 0.7) +
  geom_point(position = position_dodge(width = 0.5), size = 3) +
  scale_colour_brewer(palette = "Set1") +
  labs(title    = "FMB Risk Prices λ — 100 Size×BM Portfolios",
       subtitle = "Point = NW mean; bars = 95% NW confidence interval",
       x = "Factor", y = "Risk price λ (%/month)", colour = "Model") +
  theme_bw()