## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 8,
  fig.height = 6
)

library(BCFM)
library(dplyr)    
library(tidyr)    
library(ggplot2)   

## ----simulated-data-exploration-----------------------------------------------
# Load the simulated dataset
data("sim.data", package = "BCFM")

# Examine the structure
str(sim.data)

# Extract dimensions
n_obs <- dim(sim.data)[1]      # 200 observations
n_vars <- dim(sim.data)[2]     # 20 variables

cat("Dataset dimensions:\n")
cat("  Observations:", n_obs, "\n")
cat("  Variables:", n_vars, "\n")

## ----run-model-selection------------------------------------------------------
# Specify variables to use for clustering
cluster.vars <- paste0("V", 1:n_vars)

# Create output directory for results (using tempdir() for CRAN compliance)
output_dir <- file.path(tempdir(), "BCFM_analysis")

# Run model selection
# Note: grouplist and factorlist can be vectors (e.g., c(2, 3, 4)) or sequences (e.g., 2:4)
# The function will fit a model for each combination of groups and factors
BCFM.model.selection(
  data = sim.data,
  cluster.vars = cluster.vars,
  grouplist = 4,          # Number of groups
  factorlist = 3,         # Number of factors
  n.iter = 100,           # Number of MCMC iterations (use 50000+ for real analysis)
  burnin = 10,            # Burnin for IC calculation (use 10000+ for real analysis)
  every = 10,             # Retains the output of every "every" MCMC iterations
  cluster.size = 0.01,    # Minimum proportion required for each cluster (default 0.05)
  output_dir = output_dir, # Specify where to save results
  seed = 123              # Optional seed for reproducibility
)

## ----check-output-------------------------------------------------------------
# Check what files were created
list.files(output_dir)

# Load IC results (if multiple models were fitted)
load(file.path(output_dir, "IC.Rdata"))
print(IC.matrix)

# Load the fitted model results
load(file.path(output_dir, "results-covarianceF-g4-f3.Rdata"))

# Examine the structure
str(SDresult$Result, max.level = 1)

## ----factor-loadings-matrix---------------------------------------------------
# Check dimensions and calculate posterior mean of B (after burnin)
n_iter <- dim(SDresult$Result$B)[1]
burnin <- min(100, floor(n_iter * 0.2))  # Use 20% as burnin or 100, whichever is smaller

cat("Total iterations:", n_iter, "\n")
cat("Using burnin:", burnin, "\n")

# Calculate posterior mean after burnin
B.matrix <- apply(SDresult$Result$B[burnin:n_iter, , ], MARGIN = c(2, 3), FUN = mean)
B.matrix <- as.data.frame(B.matrix)

# Set variable names
rownames(B.matrix) <- paste0("Var", 1:nrow(B.matrix))

# Set informative column names for factors
colnames(B.matrix) <- paste0("Factor ", 1:ncol(B.matrix))

# Print the factor loadings matrix
knitr::kable(round(B.matrix, 3), 
             caption = "Posterior Mean Factor Loadings",
             align = 'c')

## ----latent-profiles-plot, results='hide'-------------------------------------
ggplot_latent.profiles(Gibbs = SDresult$Result)

## ----mu-density-plot, results='hide'------------------------------------------
ggplot_mu.density(Gibbs = SDresult$Result, add.legend = TRUE)

## ----probs-density-plot, results='hide'---------------------------------------
ggplot_probs.density(Gibbs = SDresult$Result)

## ----probs-trace-plot, results='hide'-----------------------------------------
ggplot_probs.trace(Gibbs = SDresult$Result)

## ----heatmap-plot, results='hide'---------------------------------------------
ggplot_Zit.heatmap(Gibbs = SDresult$Result)

## ----model-components---------------------------------------------------------
# Cluster assignments (most likely cluster for each observation)
cluster_assignments <- SDresult$Result$Z

# Loading matrix
loadings <- SDresult$Result$B

# Cluster means
cluster_means <- SDresult$Result$mu

# Cluster probabilities
cluster_probs <- SDresult$Result$probs

# Factor scores
factor_scores <- SDresult$Result$X

# Show dimensions
cat("Dimensions of key components:\n")
cat("  B (loadings):", dim(loadings), "\n")
cat("  mu (means):", dim(cluster_means), "\n")
cat("  X (scores):", dim(factor_scores), "\n")

## ----session-info-------------------------------------------------------------
sessionInfo()

