## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  message = FALSE,
  cache = FALSE
)

## ----eval=FALSE---------------------------------------------------------------
# install.packages("mrddGlobal")

## -----------------------------------------------------------------------------
library(`mrddGlobal`)

## ----fig.width=6, fig.height=5, fig.align="center"----------------------------
library(sf)
library(ggplot2)

# Loading in shape file data
nc <- st_read(system.file("shape/nc.shp", package = "sf"))
nc <- st_union(nc)

# Generating points in and out of NC
bbox <- st_bbox(nc)
n <- 2000
pts <- data.frame(
  x = runif(n, bbox["xmin"], bbox["xmax"]),
  y = runif(n, bbox["ymin"], bbox["ymax"])
)
knitr::kable(head(pts))

## ----fig.width=6, fig.height=5, fig.align="center"----------------------------
pts_sf <- st_as_sf(pts, coords = c("x", "y"), crs = st_crs(nc))
inside <- st_within(pts_sf, nc, sparse = FALSE)[,1]
pts_sf$status <- ifelse(inside, "Inside NC", "Outside NC")

ggplot() +
  geom_sf(data = nc, fill = NA, color = "black", linewidth = 0.6) +
  geom_sf(data = pts_sf, aes(color = status), alpha = 0.6, size = 1) +
  coord_sf() +
  scale_color_manual(values = c("Inside NC" = "blue", "Outside NC" = "red")) +
  theme_minimal() +
  labs(
    title = "Random Points Inside and Outside North Carolina",
    color = ""
  )

## ----fig.width=6, fig.height=5, fig.align="center"----------------------------
closest_points_all <- get_closest_points_shp(X = pts, shp_file = nc)
knitr::kable(head(closest_points_all))

## ----fig.width=6, fig.height=5, fig.align="center"----------------------------
closest_points_geo <- st_as_sf(as.data.frame(closest_points_all), coords = c("X", "Y"), crs = 4326)
ggplot() +
  geom_sf(data = nc, fill = NA, color = "black", linewidth = 0.6) +
  geom_sf(data = closest_points_geo, alpha = 0.6, size = 1, color = "purple") +
  coord_sf() +
  theme_minimal() +
  labs(
    title = "Closest Boundary Points for North Carolina Observations",
    color = ""
  )

## ----echo=FALSE, out.width="80%", fig.align="center"--------------------------
knitr::include_graphics("images/wrinkled_sphere.png")

## -----------------------------------------------------------------------------
# First, I will create a simulated dataframe
set.seed(1)
n <- 2000
df <- matrix(runif(n * 3, -1.2, 1.2), ncol = 3)

# Then I will define the function that determines the boundary
bndry_func <- function(X) {
  rowSums(X^2) + 0.5 * sin(rowSums(X^4)) - 1
}

# Finally, I will get just the point cloud from the make_cloud function
cloud <- make_cloud(X = df, bndry_func = bndry_func, num_pts_discr = 2e4, step_size = 1e-1, tol = 1e-2,
                    verbose = FALSE)
knitr::kable(head(cloud))

## -----------------------------------------------------------------------------
treated <- ifelse(bndry_func(df) <= 0, 1, 0)
df_bndry <- get_closest_points_bbox(cloud = cloud, X = df, t = treated)
closest_points_all <- df_bndry[["closest_points"]]
knitr::kable(head(closest_points_all))

## -----------------------------------------------------------------------------
# Closest boundary points are found analytically since the boundary is simple
closest_boundary_point <- function(df) {
  # Separating individual coordinates and initializing output matrix
  x1 <- df[, 1]
  x2 <- df[, 2]
  result <- matrix(0, nrow = nrow(df), ncol = 2)
  
  # Case 1: both coordinates are negative
    # Closest boundary point is the origin
  mask1 <- x1 < 0 & x2 < 0
  result[mask1, ] <- cbind(0, 0)
  
  # Case 2: first coordinate is positive and larger than the second coordinate 
    # Closest boundary point is either directly below or above on the x axis
  mask2 <- x1 >= 0 & x1 > x2 & !mask1
  result[mask2, ] <- cbind(x1[mask2], 0)
  
  # Case 3: second coordinate is positive and larger than the first coordinate 
    # Closest boundary point is either directly left or right on the y axis
  mask3 <- x2 >= 0 & x1 < x2 & !mask1
  result[mask3, ] <- cbind(0, x2[mask3])
  
  return(result)
}

sim_data <- gen_data_cef(1, n = 1000, seed = 1)
X <- as.matrix(sim_data[,1:2])
Y <- as.matrix(sim_data[,3])
t <- ifelse(X[,1] >= 0 & X[,2] >= 0, 1, 0)
closest_points_all <- closest_boundary_point(X)

## -----------------------------------------------------------------------------
results <- cef_disc_test(X, Y, t, closest_points_all, verbose = FALSE, num.trees = 5000, enable.ll.split = TRUE, bwselect = "diff")
results

## -----------------------------------------------------------------------------
results <- cef_disc_test(X, Y, t, closest_points_all, num_splits = 10, verbose = FALSE, num.trees = 5000, enable.ll.split = TRUE, bwselect = "diff")
results

## -----------------------------------------------------------------------------
summary(results)

## -----------------------------------------------------------------------------
results <- cef_disc_test(X, Y, t, closest_points_all, num_splits = 15, verbose = FALSE, num.trees = 5000, enable.ll.split = TRUE, bwselect = "diff")
results

## ----fig.width=6, fig.height=6, fig.align="center", echo = FALSE--------------
library(patchwork)
library(dplyr)
library(ggplot2)

set.seed(0)
theta <- seq(0, 2 * pi, length.out = 400)
boundary <- data.frame(
  x = 0.5 + 0.5*cos(theta),
  y = 0.5 + 0.5*sin(theta)
)

split_box <- function(box, depth, max_depth) {
  if (depth == max_depth) return(list(box))
  
  axis <- sample(c("x", "y"), 1)
  
  if (axis == "x") {
    cut <- (box$xmin + box$xmax) / 2
    
    left  <- list(
      xmin = box$xmin, xmax = cut,
      ymin = box$ymin, ymax = box$ymax
    )
    right <- list(
      xmin = cut, xmax = box$xmax,
      ymin = box$ymin, ymax = box$ymax
    )
    
  } else {
    cut <- (box$ymin + box$ymax) / 2
    
    left  <- list(
      xmin = box$xmin, xmax = box$xmax,
      ymin = box$ymin, ymax = cut
    )
    right <- list(
      xmin = box$xmin, xmax = box$xmax,
      ymin = cut, ymax = box$ymax
    )
  }
  
  c(
    split_box(left,  depth + 1, max_depth),
    split_box(right, depth + 1, max_depth)
  )
}

bbox <- list(xmin = 0, xmax = 1, ymin = 0, ymax = 1)
leaves <- split_box(bbox, depth = 0, max_depth = 3)

leaves_df <- bind_rows(lapply(leaves, as.data.frame))

p_partition <- ggplot() +
  geom_polygon(
    data = boundary,
    aes(x, y),
    fill = "grey80",
    color = "grey40",
    alpha = 0.8
  ) +
  geom_rect(
    data = leaves_df,
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    fill = NA,
    color = "black",
    linewidth = 0.6
  ) +
  geom_point(aes(x = 0.146, y = 0.854),
             color = "red", size = 3) +
  coord_equal() +
  labs(title = "Recursive Partition with Irregular Support") +
  theme_minimal() + 
  labs(x = "x1", y = "x2")

p_partition

## ----fig.width=6, fig.height=6, fig.align="center", echo = FALSE--------------
top_left_leaf <- leaves_df %>%
  filter(ymax == max(ymax)) %>%  
  filter(xmin == min(xmin))     

set.seed(123)
n_mc <- 500
mc <- data.frame(
  x = runif(n_mc, top_left_leaf$xmin, top_left_leaf$xmax),
  y = runif(n_mc, top_left_leaf$ymin, top_left_leaf$ymax)
)

mc[["inside"]] <- ifelse((mc$x - 0.5)^2 + (mc$y - 0.5)^2 <= 0.5^2, TRUE, FALSE)

ggplot() +
  geom_polygon(
    data = boundary,
    aes(x, y),
    fill = "grey85",
    color = "grey50",
    alpha = 0.6
  ) +
  geom_rect(
    data = top_left_leaf,
    aes(xmin = xmin, xmax = xmax, ymin = ymin, ymax = ymax),
    fill = NA,
    color = "black",
    linewidth = 1.2
  ) +
  geom_point(
    data = mc,
    aes(x, y, color = inside),
    size = 1.5
  ) +
  scale_color_manual(values = c("grey70","steelblue")) +
  coord_equal() +
  labs(title = "Monte Carlo Points in Top-Left Leaf",
       color = "Inside Support") +
  theme_minimal() + 
  labs(x = "x1", y = "x2")

## -----------------------------------------------------------------------------
nc <- st_read(system.file("shape/nc.shp", package = "sf"))
nc <- st_union(nc)
bbox <- st_bbox(nc)
bbox

## ----fig.width=6, fig.height=6, fig.align="center"----------------------------
set.seed(123)
n <- 1000
x1 <- runif(n)
x2 <- runif(n)
x <- cbind(x1, x2)

bndry_func <- function(X) {
  return((X[,1] - 0.5)^2 + (X[,2] - 0.5)^2 - 0.5^2)
}

cloud <- make_cloud(X = x, bndry_func = bndry_func, start_pt = c(1, 0), num_pts_discr = 1e4, tol = 1e-2,
                    verbose = FALSE)
plot(cloud, xlab = "x1", ylab = "x2", main = "Circle Boundary Point Cloud")

## -----------------------------------------------------------------------------
inside <- ifelse((x[,1] - 0.5)^2 + (x[,2] - 0.5)^2 <= 0.5^2, 1, 0)
df_bndry <- get_closest_points_bbox(cloud, x, inside)

lower_bounds0 <- df_bndry[["lower_bounds0"]]
lower_bounds1 <- df_bndry[["lower_bounds1"]]
upper_bounds0 <- df_bndry[["upper_bounds0"]]
upper_bounds1 <- df_bndry[["upper_bounds1"]]

cat("lower_bounds0:\n")
print(lower_bounds0)

cat("\nlower_bounds1:\n")
print(lower_bounds1)

cat("\nupper_bounds0:\n")
print(upper_bounds0)

cat("\nupper_bounds1:\n")
print(upper_bounds1)

## -----------------------------------------------------------------------------
in_region1 <- function(X) {
  (X[1] - 0.5)^2 + (X[2] - 0.5)^2 <= 0.5^2
}

in_region0 <- function(X) {
  (X[1] - 0.5)^2 + (X[2] - 0.5)^2 > 0.5^2
}

## ----fig.width=6, fig.height=6, fig.align="center"----------------------------
X <- gen_data_density(1, seed = 12345)
bndry_func <- function(X) {
  # To ensure that each running variable remains between -1 and 1, I will add a penalty 
  if (any(abs(X) > 1)) {
    return(999)
  } else {
    return(min(X))
  }
}

in_region1 <- function(x) {
  x[1] >= 0 && x[2] >= 0 && x[1] <= 1 && x[2] <= 1
}

in_region0 <- function(x) {
  !(x[1] >= 0 && x[2] >= 0) && abs(x[1]) <= 1 && abs(x[2]) <= 1
}

cloud <- make_cloud(X, bndry_func = bndry_func, num_pts_discr = 2e4, step_size = 1e-1, tol = 1e-3,
                    verbose = FALSE)
plot(cloud, xlab = "x1", ylab = "x2", main = "Min Boundary Point Cloud")

## -----------------------------------------------------------------------------
inside <- ifelse(X[,1] >= 0 & X[,2] >= 0, 1, 0)
df_bndry <- get_closest_points_bbox(cloud, X, inside)

results_gen <- density_disc_test(X, inside, df_bndry[["closest_points"]], in_region0,
                                 in_region1, df_bndry[["lower_bounds0"]], 
                                 df_bndry[["lower_bounds1"]], df_bndry[["upper_bounds0"]],
                                 df_bndry[["upper_bounds1"]], verbose = FALSE, 
                                 rand_mid_split = TRUE, num_splits = 1, mc_samples = 500)

results_gen

## -----------------------------------------------------------------------------
closest_points <- closest_boundary_point(X)
t_R <- system.time(
  results_gen <- density_disc_test(X, inside, closest_points, in_region0,
                                 in_region1, c(-1, -1), 
                                 c(0, 0), c(1, 1),
                                 c(1, 1), verbose = FALSE, num_splits = 1, mc_samples = 500)
)
results_gen

## -----------------------------------------------------------------------------
library(Rcpp)

cppFunction(
  'SEXP region1_to_xptr() {
      typedef bool (*region_ptr)(const NumericVector&);
      return XPtr<region_ptr>(new region_ptr(in_region1_cpp), true);
  }', 
  includes = '
  // C++ version of in_region1
  bool in_region1_cpp(const NumericVector& x) {
      return (x[0] >= 0.0 && x[1] >= 0.0 &&
              x[0] <= 1.0 && x[1] <= 1.0);
  }'
)

cppFunction(
  'SEXP region0_to_xptr() {
      typedef bool (*region_ptr)(const NumericVector&);
      return XPtr<region_ptr>(new region_ptr(in_region0_cpp), true);
  }', 
  includes = '
  // C++ version of in_region0
  bool in_region0_cpp(const NumericVector& x) {
      bool not_in_first_quadrant = !(x[0] >= 0.0 && x[1] >= 0.0);
      bool within_box = (std::abs(x[0]) <= 1.0 && std::abs(x[1]) <= 1.0);
      return not_in_first_quadrant && within_box;
  }'
)

ptr1 <- region1_to_xptr()
ptr0 <- region0_to_xptr()

t_cpp <- system.time(
  results_gen <- density_disc_test(X, inside, closest_points, ptr0,
                                 ptr1, c(-1, -1), 
                                 c(0, 0), c(1, 1),
                                 c(1, 1), verbose = FALSE, num_splits = 1, mc_samples = 500)
)
cat("R Runtime:\n")
print(t_R)
cat("\n")
cat("C++ Runtime:\n")
print(t_cpp)

