## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

if (!requireNamespace("bigmemory", quietly = TRUE)) {
  cat("This vignette requires the 'bigmemory' package.\n")
  knitr::knit_exit()
}

library(bigKNN)
library(bigmemory)

## ----helpers, include=FALSE---------------------------------------------------
knn_table <- function(result, query_ids, ref_ids) {
  do.call(rbind, lapply(seq_along(query_ids), function(i) {
    data.frame(
      query = query_ids[i],
      rank = seq_len(result$k),
      neighbor = ref_ids[result$index[i, ]],
      distance = signif(result$distance[i, ], 4),
      row.names = NULL
    )
  }))
}

radius_slice <- function(result, i, ref_ids) {
  start <- result$offset[i]
  end <- result$offset[i + 1L] - 1L

  if (start > end) {
    return(data.frame(neighbor = character(0), distance = numeric(0)))
  }

  data.frame(
    neighbor = ref_ids[result$index[start:end]],
    distance = signif(result$distance[start:end], 4),
    row.names = NULL
  )
}

## ----create-reference---------------------------------------------------------
reference_points <- data.frame(
  id = paste0("p", 1:6),
  x1 = c(1, 2, 1, 2, 3, 4),
  x2 = c(1, 1, 2, 2, 2, 3)
)

query_points <- data.frame(
  id = c("q1", "q2"),
  x1 = c(1.2, 2.8),
  x2 = c(1.1, 2.2)
)

reference <- as.big.matrix(as.matrix(reference_points[c("x1", "x2")]))
query_matrix <- as.matrix(query_points[c("x1", "x2")])

reference_points
query_points

## ----self-knn-----------------------------------------------------------------
self_knn <- knn_bigmatrix(reference, k = 2)
self_knn

## ----self-knn-components------------------------------------------------------
self_knn$index
round(self_knn$distance, 3)

## ----self-knn-table-----------------------------------------------------------
knn_table(self_knn, query_ids = reference_points$id, ref_ids = reference_points$id)

## ----query-knn----------------------------------------------------------------
query_knn <- knn_bigmatrix(
  reference,
  query = query_matrix,
  k = 3,
  exclude_self = FALSE
)

query_knn
knn_table(query_knn, query_ids = query_points$id, ref_ids = reference_points$id)

## ----radius-search------------------------------------------------------------
radius_result <- radius_bigmatrix(
  reference,
  query = query_matrix,
  radius = 1.15,
  exclude_self = FALSE
)

radius_result
radius_result$n_match
radius_result$offset

## ----radius-slices------------------------------------------------------------
radius_slice(radius_result, 1, reference_points$id)
radius_slice(radius_result, 2, reference_points$id)

## ----radius-counts------------------------------------------------------------
count_within_radius_bigmatrix(
  reference,
  query = query_matrix,
  radius = 1.15,
  exclude_self = FALSE
)

## ----metric-comparison--------------------------------------------------------
metric_summary <- do.call(rbind, lapply(
  c("euclidean", "sqeuclidean", "cosine"),
  function(metric) {
    result <- knn_bigmatrix(
      reference,
      query = query_matrix,
      k = 1,
      metric = metric,
      exclude_self = FALSE
    )

    data.frame(
      metric = metric,
      query = query_points$id,
      nearest = reference_points$id[result$index[, 1]],
      distance = signif(result$distance[, 1], 4),
      row.names = NULL
    )
  }
))

metric_summary

