## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5,
  message = FALSE,
  warning = FALSE
)

## ----setup--------------------------------------------------------------------
library(tidylearn)
library(dplyr)
library(ggplot2)
library(gt)

## ----plot-regression----------------------------------------------------------
model_reg <- tl_model(mtcars, mpg ~ wt + hp, method = "linear")

# Actual vs predicted — one call
plot(model_reg, type = "actual_predicted")

## ----plot-classification------------------------------------------------------
split <- tl_split(iris, prop = 0.7, stratify = "Species", seed = 42)
model_clf <- tl_model(split$train, Species ~ ., method = "forest")

plot(model_clf, type = "confusion")

## ----plot-pca-----------------------------------------------------------------
pca <- tidy_pca(USArrests, scale = TRUE)

tidy_pca_screeplot(pca)
tidy_pca_biplot(pca, label_obs = TRUE)

## ----plot-lasso---------------------------------------------------------------
model_lasso <- tl_model(mtcars, mpg ~ ., method = "lasso")

tl_plot_regularization_path(model_lasso)
tl_plot_regularization_cv(model_lasso)

## ----table-auto, eval = FALSE-------------------------------------------------
# tl_table(model)                       # auto-selects the best table type
# tl_table(model, type = "coefficients") # specific type

## ----table-metrics------------------------------------------------------------
tl_table_metrics(model_reg)

## ----table-coef---------------------------------------------------------------
tl_table_coefficients(model_reg)

## ----table-coef-lasso---------------------------------------------------------
tl_table_coefficients(model_lasso)

## ----table-confusion----------------------------------------------------------
tl_table_confusion(model_clf, new_data = split$test)

## ----table-importance---------------------------------------------------------
tl_table_importance(model_clf)

## ----table-variance-----------------------------------------------------------
pca_model <- tl_model(USArrests, method = "pca")
tl_table_variance(pca_model)

## ----table-loadings-----------------------------------------------------------
tl_table_loadings(pca_model)

## ----table-clusters-----------------------------------------------------------
km <- tl_model(iris[, 1:4], method = "kmeans", k = 3)
tl_table_clusters(km)

## ----table-comparison---------------------------------------------------------
m1 <- tl_model(split$train, Species ~ ., method = "logistic")
m2 <- tl_model(split$train, Species ~ ., method = "forest")
m3 <- tl_model(split$train, Species ~ ., method = "tree")

tl_table_comparison(
  m1, m2, m3,
  new_data = split$test,
  names = c("Logistic", "Random Forest", "Decision Tree")
)

## ----plotly, eval = FALSE-----------------------------------------------------
# library(plotly)
# 
# ggplotly(plot(model_reg, type = "actual_predicted"))
# ggplotly(tidy_pca_biplot(pca, label_obs = TRUE))
# ggplotly(tl_plot_regularization_path(model_lasso))

## ----workflow-----------------------------------------------------------------
# Fit
model <- tl_model(split$train, Species ~ ., method = "forest")

# Evaluate
tl_table_metrics(model, new_data = split$test)

# Visualise
plot(model, type = "confusion")

# Drill into feature importance
tl_table_importance(model, top_n = 4)