## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", eval = FALSE)

## ----install------------------------------------------------------------------
# install.packages("widr")
# 
# # Development version
# remotes::install_github("cherylisabella/widr")

## ----codes--------------------------------------------------------------------
# wid_search("national income")                           # keyword search across concepts
# wid_decode("sptinc992j")                                # parse into components
# wid_encode("s", "ptinc", age = "992", pop = "j")       # build from components
# wid_is_valid(series_type = "s", concept = "ptinc")      # non-throwing validation

## ----download-----------------------------------------------------------------
# library(widr)
# 
# # Top 1% pre-tax income share, United States, 2000-2022
# top1 <- download_wid(
#   indicators = "sptinc992j",
#   areas      = "US",
#   perc       = "p99p100",
#   years      = 2000:2022
# )
# 
# top1
# #> <wid_df>  23 rows | 1 countries | 1 variables
# #>   country   variable percentile year  value age pop
# #> 1      US sptinc992j  p99p100   2000  0.168 992   j
# #> ...

## ----multi--------------------------------------------------------------------
# shares <- download_wid(
#   indicators = "sptinc992j",
#   areas      = c("US", "FR", "DE", "CN"),
#   perc       = c("p90p100", "p99p100"),
#   years      = 1980:2022
# )

## ----extrap-------------------------------------------------------------------
# download_wid("sptinc992j", areas = "MZ", include_extrapolations = FALSE)

## ----meta---------------------------------------------------------------------
# result <- download_wid("sptinc992j", areas = "US", metadata = TRUE)
# attr(result, "wid_meta")
# #>     variable country      source method quality    imputation
# #> 1 sptinc992j      US Tax records    DFL    high adjusted surveys

## ----tidy-pipe----------------------------------------------------------------
# library(dplyr)
# library(ggplot2)
# 
# top1 |>
#   wid_tidy(country_names = FALSE) |>
#   filter(year >= 1990) |>
#   ggplot(aes(year, value)) +
#   geom_line(colour = "#58a6ff", linewidth = 0.9) +
#   scale_y_continuous(labels = scales::percent_format()) +
#   labs(title = "Top 1% pre-tax income share - United States",
#        x = NULL, y = NULL) +
#   theme_minimal()

## ----query--------------------------------------------------------------------
# q <- wid_query(indicators = "sptinc992j", areas = c("US", "FR"), cache = FALSE)
# q <- wid_filter(q, years = 2010:2022)
# wid_fetch(q)

## ----cache--------------------------------------------------------------------
# wid_cache_list()    # list cached queries
# wid_cache_clear()   # remove all

## ----convert------------------------------------------------------------------
# # Bottom 50% average income, four countries - convert to 2022 USD PPP
# download_wid("aptinc992j", areas = c("US", "FR", "CN", "IN"), perc = "p0p50") |>
#   wid_convert(target = "ppp", base_year = "2022")

## ----gini---------------------------------------------------------------------
# dist <- download_wid("sptinc992j", areas = c("US", "FR"), perc = "all",
#                      years = 1990:2022)
# wid_gini(dist)
# #>   country year  gini
# #> 1      FR 1990 0.411
# #> 2      US 1990 0.453

## ----top-share----------------------------------------------------------------
# wid_top_share(dist, top = 0.01)   # top 1%
# wid_top_share(dist, top = 0.10)   # top 10%

## ----perc-ratio---------------------------------------------------------------
# thresh <- download_wid("tptinc992j", areas = "US", perc = "all")
# wid_percentile_ratio(thresh)                                          # P90/P10
# wid_percentile_ratio(thresh, numerator = "p90", denominator = "p50") # P90/P50

## ----plot---------------------------------------------------------------------
# # Time series - one line per country; facet = TRUE for separate panels
# wid_plot_timeseries(shares,
#   country_labels = c(US = "United States", FR = "France",
#                      DE = "Germany",       CN = "China"))
# 
# # Cross-country bar chart for a single year
# wid_plot_compare(shares, year = 2020)
# 
# # Lorenz curve
# wid_plot_lorenz(dist, country = "US")

## ----full-example-------------------------------------------------------------
# library(widr); library(dplyr); library(ggplot2)
# 
# download_wid(
#   indicators = "aptinc992j",
#   areas      = c("US", "FR", "CN", "IN"),
#   perc       = "p0p50",
#   years      = 1990:2022
# ) |>
#   wid_convert(target = "ppp", base_year = "2022") |>
#   wid_tidy(country_names = TRUE) |>
#   ggplot(aes(year, value, colour = country_name)) +
#   geom_line(linewidth = 0.8) +
#   scale_y_continuous(labels = scales::dollar_format()) +
#   labs(title    = "Bottom 50% average pre-tax income",
#        subtitle = "2022 USD PPP · equal-split adults 20+",
#        x = NULL, y = NULL, colour = NULL)