## ----setup, include=FALSE-----------------------------------------------------
hasFNN <- requireNamespace("FNN", quietly = TRUE)
knitr::opts_chunk$set(echo = TRUE, fig.align = "center")

## ----install, eval=FALSE------------------------------------------------------
# devtools::install_github("yinqiaoyan/spARI", dependencies = TRUE)

## ----install_FNN, eval=FALSE--------------------------------------------------
# install.packages("FNN")  # find k-nearest neighbors

## -----------------------------------------------------------------------------
library(spARI)

## -----------------------------------------------------------------------------
data("spARI_example_data")
true_labels <- spARI_example_data$true_labels
c1_labels <- spARI_example_data$c1_labels
c2_labels <- spARI_example_data$c2_labels
coords <- spARI_example_data$coords

## -----------------------------------------------------------------------------
res_value1 <- spARI(r_labels=true_labels, c_labels=c1_labels, coords=coords)
res_value2 <- spARI(r_labels=true_labels, c_labels=c2_labels, coords=coords)
print(res_value1)
print(res_value2)

## -----------------------------------------------------------------------------
cols <- c("#D27786","#A5D7F1","#c599f3","#ffb610") 
par(mfrow=c(1,3), mar=c(3,3,1,1))
plot(coords[, 1], coords[, 2], col=cols[true_labels], pch=16, main="Reference")
plot(coords[, 1], coords[, 2], col=cols[c1_labels], pch=16, main="Clustering i")
plot(coords[, 1], coords[, 2], col=cols[c2_labels], pch=16, main="Clustering ii")

## -----------------------------------------------------------------------------
set.seed(1)
coords <- data.frame(x = c(runif(30,0,1), runif(30,1.2,2.2)),
                     y = runif(60,0,1))
# reference
ref <- c(rep(1,30), rep(2,30))
# clustering A
cluA <- sample(ref)
# clustering B
cluB <- ref

par(mfrow=c(1,2), mar=c(3,3,1,1))
plot(coords$x, coords$y, col=cluA, pch=16, main="Clustering A (lower spRI/spARI)")
plot(coords$x, coords$y, col=cluB, pch=16, main="Clustering B (higher spRI/spARI)")

# compute spRI and spARI
res_A <- spARI(ref, cluA, coords = as.matrix(coords))
res_B <- spARI(ref, cluB, coords = as.matrix(coords))
print(res_A)
print(res_B)

## -----------------------------------------------------------------------------
library(spARI)
data("spARI_example_data")
true_labels <- spARI_example_data$true_labels
c1_labels <- spARI_example_data$c1_labels
c2_labels <- spARI_example_data$c2_labels
coords <- spARI_example_data$coords

## Compute the distance matrix
coords_norm <- coords
coords_norm[,1] <- (coords[,1] - min(coords[,1])) / (max(coords[,1]) - min(coords[,1]))
coords_norm[,2] <- (coords[,2] - min(coords[,2])) / (max(coords[,2]) - min(coords[,2]))
dist_mat <- as.matrix(stats::dist(coords_norm))

## -----------------------------------------------------------------------------
res_value1 <- spARI(r_labels=true_labels, c_labels=c1_labels, dist_mat=dist_mat)
res_value2 <- spARI(r_labels=true_labels, c_labels=c2_labels, dist_mat=dist_mat)
print(res_value1)
print(res_value2)

## ----message=FALSE, warning=FALSE, eval=hasFNN--------------------------------
library(FNN)
library(Matrix)

## Define sparse distance matrix generation function
build_symmetric_knn_distance_matrix <- function(coord_mat, k = 5) {
  N <- nrow(coord_mat)
  
  # Find k nearest neighbors for each object (exclude itself)
  knn_res <- FNN::get.knn(coord_mat, k = k)
  i_idx <- rep(1:N, each = k)
  j_idx <- as.vector(t(knn_res$nn.index))
  d_val <- as.vector(t(knn_res$nn.dist))
  
  # Generate sparse matrix (symmetric)
  D_temp <- sparseMatrix(
    i = i_idx,
    j = j_idx,
    x = d_val,
    dims = c(N, N)
  )
  
  D1 <- as(D_temp, "dgTMatrix")
  D2 <- as(t(D_temp), "dgTMatrix")
  
  i_all <- c(D1@i, D2@i)
  j_all <- c(D1@j, D2@j)
  x_all <- c(D1@x, D2@x)
  
  # Combine and aggregate max
  df <- data.frame(i = i_all + 1, j = j_all + 1, x = x_all)  # +1 for R indexing
  df_agg <- aggregate(x ~ i + j, data = df, FUN = max)
  
  # Create symmetric sparse matrix
  D_sym <- sparseMatrix(i = df_agg$j, j = df_agg$i, x = df_agg$x)
  
  return(D_sym)
}

## Generate sparse distance matrix
set.seed(123)
N <- 1e5  # 100,000 objects
coords <- matrix(runif(N*2), N, 2)  # coordinates ranging between 0 and 1
dist_mat <- build_symmetric_knn_distance_matrix(coords, k = 5)

dim(dist_mat)
# 100000 100000
length(dist_mat@x)
# 595850

## ----eval=hasFNN--------------------------------------------------------------
K <- 15
set.seed(123)

# reference partition
true_labels <- sample(1:K, N, replace = TRUE)
# clustering partition
c_labels <- true_labels
ids <- sample(which(true_labels == 1), 5000)
c_labels[ids] <- 2
ids <- sample(which(true_labels == 3), 5000)
c_labels[ids] <- 4

## ----eval=hasFNN--------------------------------------------------------------
library(spARI)

stime <- Sys.time()
res <- spARI(true_labels, c_labels, dist_mat=dist_mat)
etime <- Sys.time()
print(res)
#      spRI     spARI 
# 0.9834065 0.8752946 
print(etime-stime) 
# Execution time is about 4.8 seconds
# at a MacBook Air powered by Apple M4 CPU with 16GB of RAM

## -----------------------------------------------------------------------------
set.seed(12)  
## Generate adjacency matrix
n <- 10
p <- 0.4
adj_mat <- matrix(0L, n, n)   
up_tri <- upper.tri(adj_mat)
adj_mat[up_tri] <- rbinom(sum(up_tri), size = 1, prob = p)
adj_mat <- adj_mat + t(adj_mat)               
diag(adj_mat) <- 0                   
print(adj_mat)

## Generate synthetic reference and clustering partitions
ref <- sample(1:3, n, replace = TRUE)
clu <- ref
clu[c(6,7)] <- 1

## -----------------------------------------------------------------------------
library(spARI)
res <- spARI(r_labels = ref, c_labels = clu, dist_mat = adj_mat)
print(res)

## -----------------------------------------------------------------------------
library(spARI)
data("spARI_example_data")
true_labels <- spARI_example_data$true_labels
c1_labels <- spARI_example_data$c1_labels
c2_labels <- spARI_example_data$c2_labels
coords <- spARI_example_data$coords

## -----------------------------------------------------------------------------
set.seed(42)
perm_test(r_labels=true_labels, c_labels=c1_labels, coords=coords, 
          use_parallel=FALSE)
perm_test(r_labels=true_labels, c_labels=c2_labels, coords=coords, 
          use_parallel=FALSE)

## ----message=FALSE, warning=FALSE---------------------------------------------
library(SpatialExperiment)
library(S4Vectors)

set.seed(123)
count_matrix <- matrix(
  sample(0:10, 100, replace = TRUE),
  nrow = 10,  # 10 genes
  ncol = 10   # 10 spots
)
rownames(count_matrix) <- paste0("gene", 1:10)
colnames(count_matrix) <- paste0("spot", 1:10)

# Construct gene annotations (rowData)
gene_annotation <- DataFrame(
  gene_id = rownames(count_matrix),
  gene_name = paste0("Gene_", 1:10)
)

# Construct spot metadata (colData)
spot_metadata <- DataFrame(
  spot_id = colnames(count_matrix),
  sample_id = rep("sample1", 10),
  cell_type = c(2, 3, 2, 2, 1, 3, 1, 3, 1, 3),
  cluster = c(2, 3, 2, 3, 1, 3, 3, 3, 1, 3)
)

# Construct spatial coordinates (spatialCoords)
coords_matrix <- cbind(
  x = runif(10, min = 0, max = 100),
  y = runif(10, min = 0, max = 100)
)
rownames(coords_matrix) <- colnames(count_matrix)

# Construct the SpatialExperiment object
spe <- SpatialExperiment(
  assays = list(counts = count_matrix),
  rowData = gene_annotation,
  colData = spot_metadata,
  spatialCoords = coords_matrix
)

## -----------------------------------------------------------------------------
library(spARI)
spARI(spe=spe)

set.seed(42)
perm_test(spe=spe, use_parallel=FALSE)

## ----session-info, echo=FALSE-------------------------------------------------
sessionInfo()

