## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
library(nycOpenData)
library(ggplot2)
library(dplyr)

## ----small-sample-------------------------------------------------------------
small_sample <- nyc_pull_dataset("423i-ukqr", limit = 3)
small_sample

# Seeing what columns are in the dataset
names(small_sample)

## ----filter-brooklyn----------------------------------------------------------

lessthan5_locallaw18payreport <- nyc_pull_dataset("423i-ukqr", limit = 3, filters = list(number_of_employees = "<5"))
lessthan5_locallaw18payreport 


# Checking to see the filtering worked
lessthan5_locallaw18payreport |>
  distinct(number_of_employees)

## ----filter-brooklyn-nypd-----------------------------------------------------
# Creating the dataset
lessthan5TI_payreport <- nyc_pull_dataset(
  "423i-ukqr",
  limit = 15,
  filters = list(
    number_of_employees = "<5",
    agency_name = "TECHNOLOGY & INNOVATION",
    gender = "Female"
  )
)

# Calling head of our new dataset
lessthan5TI_payreport |>
  slice_head(n = 6)

# Quick check to make sure our filtering worked
lessthan5TI_payreport |>
  summarize(rows = n())

lessthan5TI_payreport |>
  distinct(agency_name)

lessthan5TI_payreport |>
  distinct(gender)

## ----compaint-type-graph, fig.alt="Bar chart showing the ethnicity of female workers in departments with less than 5 employees in Technology & Innovation.", fig.cap="Bar chart showing the ethnicity of female workers in municipal departments with less than 5 people in Technology & Innovation (15 most recent).", fig.height=5, fig.width=7----

# Visualizing the distribution, ordered by frequency

lessthan5TI_payreport |>
  count(ethnicity) |>          # count how many rows fall in each ethnicity
  ggplot(aes(
    x = n,                       # n = number of rows per ethnicity
    y = reorder(ethnicity, n)    # reorder ethnicities by their counts
  )) +
  geom_col(fill = "steelblue") + # geom_col uses the counts we already computed
  theme_minimal() +
  labs(
    title = "Ethnicity of Female Employees in Bracket of TI Agencies with Fewer Than 5 Employees",
    subtitle = "Most Recent 15 Records",
    x = "Number of Records",
    y = "Ethnicity"
  )


