## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = FALSE, warning = FALSE)

## ----load_packages, message = FALSE, warning = FALSE--------------------------
# load required packages
library(clustSIGNAL)
library(scater)
library(ggplot2)
library(dplyr)
library(patchwork)
library(aricode)

## ----embryo_data_prep---------------------------------------------------------
# load me_expr containing gene expression logcounts
# load me_data containing cell metadata including x-y coordinates
data(mEmbryo2)
# to create a SpatialExperiment object we need gene expression, cell metadata, 
# and cell locations.
spe <- SpatialExperiment::SpatialExperiment(
  assays = list(logcounts = me_expr), 
  colData = me_data,
  # spatialCoordsNames requires column names in me_data that contain 
  # xy-coordinates of cells
  spatialCoordsNames = c("X", "Y"))
spe

## ----embryo_data_columns------------------------------------------------------
spe |> colData() |> colnames() # column names in the metadata

## ----ClustSIGNAL_singleRun----------------------------------------------------
set.seed(100)
samples <- "sample_id" # column name containing sample names
# to run ClustSIGNAL, requires a SpatialExperiment object, column name of sample
# labels in colData slot, and the output type to generate (clusters, neighbours,
# and/or final spe object).
res_emb <- clustSIGNAL(spe, samples, outputs = "a") 

## ----embryo_result_list-------------------------------------------------------
res_emb |> names() # names of the outputs generated

## ----embryo_clusters_head-----------------------------------------------------
res_emb$clusters |> head() # cluster data frame has cell IDs and cluster labels

## ----embryo_final_spe---------------------------------------------------------
# for convenience with downstream analyses, we will replace the original spe
# object with the one generated by ClustSIGNAL. This does not lead to any loss 
# of information as ClustSIGNAL only adds information to the input spe object.
spe <- res_emb$spe_final
spe
spe |> colData() |> colnames()

## ----colors-------------------------------------------------------------------
colors <- c("#635547", "#8EC792", "#9e6762", "#FACB12", "#3F84AA", "#0F4A9C", 
            "#ff891c", "#EF5A9D", "#C594BF", "#DFCDE4", "#139992", "#65A83E", 
            "#8DB5CE", "#005579", "#C9EBFB", "#B51D8D", "#532C8A", "#8870ad", 
            "#cc7818", "#FBBE92", "#EF4E22", "#f9decf", "#c9a997", "#C72228", 
            "#f79083", "#F397C0", "#DABE99", "#c19f70", "#354E23", "#C3C388",
            "#647a4f", "#CDE088", "#f7f79e", "#F6BFCB", "#7F6874", "#989898", 
            "#1A1A1A", "#FFFFFF", "#e6e6e6", "#77441B", "#F90026", "#A10037", 
            "#DA5921", "#E1C239", "#9DD84A")

## ----embryo_spatialPlots1-----------------------------------------------------
# for plotting with scater R package, we need to add the spatial coordinates 
# to the reduced dimension slot of the spe object
reducedDim(spe, "spatial") <- spatialCoords(spe)

## ----embryo_spatialPlots2-----------------------------------------------------
# spatial plot
spt_clust <- scater::plotReducedDim(
  spe, colour_by = "ClustSIGNAL", dimred = "spatial", point_alpha = 1,
  point_size = 4, scattermore = TRUE) +
  ggtitle("A. Spatial plot of clusters") +
  scale_color_manual(values = colors) +
  guides(colour = guide_legend(title = "Clusters", 
                               override.aes = list(size = 5))) +
  theme(text = element_text(size = 12))

## ----embryo_spatialPlots3-----------------------------------------------------
# entropy distribution plotted at cluster-level can indicate which clusters 
# have cells from homogeneous/heterogeneous space. 
df_met <- spe |> colData() %>% as.data.frame()
ct_ent <- df_met %>% 
  mutate(ClustSIGNAL = as.character(ClustSIGNAL)) %>%
  group_by(ClustSIGNAL) %>%
  # calculating median entropy of each cluster category
  summarise(mdEntropy = median(entropy)) %>% 
  # reordering clusters by their median entropy value
  arrange(mdEntropy)
df_met$ClustSIGNAL <- factor(df_met$ClustSIGNAL, levels = ct_ent$ClustSIGNAL)
col_ent <- colors[as.numeric(as.character(ct_ent$ClustSIGNAL))]
box_clust <- df_met %>%
  ggplot(aes(x = ClustSIGNAL, y = entropy, fill = ClustSIGNAL)) +
  geom_boxplot() +
  scale_fill_manual(values = col_ent) +
  ggtitle("B. Entropy distribution of clusters") +
  labs(x = "ClustSIGNAL clusters", y = "Entropy", name = "Clusters") +
  theme_classic() +
  theme(legend.position = "none",
        text = element_text(size = 12),
        axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1),
        plot.title = element_text(face = "bold"))

## ----embryo_spatialPlots4-----------------------------------------------------
spt_clust + box_clust + patchwork::plot_layout(guides = "collect", 
                                               widths = c(2, 3))

## ----embryo_clusterMetrics----------------------------------------------------
# to assess the accuracy of clustering, the cluster labels are often compared to
# prior annotations. Here, we compare ClustSIGNAL cluster labels to annotations 
# available with this public data.
spe |> colData() %>% 
  as.data.frame() %>%
  summarise(
    ARI = aricode::ARI(celltype_mapped_refined, ClustSIGNAL), # calculate ARI
    NMI = aricode::NMI(celltype_mapped_refined, ClustSIGNAL)) # calculate NMI

## ----embryo_entropyMetrics----------------------------------------------------
# we can assess the overall entropy distribution of the dataset
spe |> colData() %>% 
  as.data.frame() %>%
  summarise(min_Entropy = min(entropy),
            min_Entropy_count = sum(spe$entropy == 0),
            max_Entropy = max(entropy),
            mean_Entropy = mean(entropy))

## ----entropyPlots1------------------------------------------------------------
# we can also visualize the distribution and spread of the entropy values
hst_ent <- spe |> colData() %>% 
  as.data.frame() %>%
  ggplot(aes(entropy)) +
  geom_histogram(binwidth = 0.05) +
  ggtitle("A. Entropy spread") +
  labs(x = "Entropy", y = "Number of neighbourhoods") +
  theme_classic() +
  theme(text = element_text(size = 12),
        plot.title = element_text(face = "bold"))

## ----entropyPlots2------------------------------------------------------------
spt_ent <- scater::plotReducedDim(spe, colour_by = "entropy",
                                    # specify spatial low dimension
                                    dimred = "spatial", point_alpha = 1,
                                    point_size = 4, scattermore = TRUE) +
  ggtitle("B. Entropy spatial distribution") +
  scale_colour_gradient2("Entropy", low = "grey", high = "blue") +
  scale_size_continuous(range = c(0, max(spe$entropy))) +
  theme(text = element_text(size = 12))

## ----entropyPlots3------------------------------------------------------------
hst_ent + spt_ent

## ----hypothal_data_prep-------------------------------------------------------
# load mh_expr containing gene expression logcounts
# load mh_data containing cell metadata and cell x-y coordinates
data(mHypothal)
# create spe object using gene expression, cell metadata, and cell locations
spe2 <- SpatialExperiment(assays = list(logcounts = mh_expr), 
                          colData = mh_data,
                          # spatialCoordsNames requires column names in 
                          # mh_data that contain xy-coordinates of cells
                          spatialCoordsNames = c("X", "Y"))
spe2

## ----hypothal_data_columns----------------------------------------------------
spe2 |> colData() |> str() # metadata summary

## ----ClustSIGNAL_multiRun-----------------------------------------------------
set.seed(110)
# ClustSIGNAL can be run on a dataset with multiple samples. As before, we need
# the SpatialExperiment object and column name of sample labels in the object. 
# The method can be run in parallel through the threads option. Here we use 
# thread = 4 to use 4 cores.
# Since no batch effects were observed in this data subset, we have not used 
# the batch and batch_by options.
samples <- "samples" # column name containing sample names
res_hyp <- clustSIGNAL(spe2, samples, threads = 4, outputs = "a")

## ----hypothal_final_spe-------------------------------------------------------
# for convenience with downstream analyses, we replace the original spe object 
# with the one generated by ClustSIGNAL.
spe2 <- res_hyp$spe_final
spe2

## ----hypothal_samples---------------------------------------------------------
samplesList <- spe2[[samples]] |> levels() # get sample names
samplesList

## ----hypothal_clusterMetrics--------------------------------------------------
spe2 |> colData() %>% 
  as.data.frame() %>%
  group_by(samples) %>%
  summarise(
    # Comparing ClustSIGNAL cluster labels to annotations available with the 
    # public data to assess its accuracy.
    ARI = aricode::ARI(Cell_class, ClustSIGNAL),
    NMI = aricode::NMI(Cell_class, ClustSIGNAL),
    # Assessing the overall entropy distribution of the samples in the dataset.
    min_Entropy = min(entropy),
    min_Entropy_count = sum(entropy == 0),
    max_Entropy = max(entropy),
    mean_Entropy = mean(entropy))

## ----hypothal_spatialPlots1---------------------------------------------------
# for plotting with scater R package, we need to add the spatial coordinates 
# to the reduced dimension section
reducedDim(spe2, "spatial") <- spatialCoords(spe2)

## ----hypothal_spatialPlots2---------------------------------------------------
# spatial plot - ClustSIGNAL clusters
spt_clust2 <- scater::plotReducedDim(spe2, colour_by = "ClustSIGNAL",
                                    # specify spatial low dimension
                                    dimred = "spatial", point_alpha = 1,
                                    point_size = 4, scattermore = TRUE) +
  scale_color_manual(values = colors) +
  facet_wrap(vars(spe2[[samples]]), scales = "free", nrow = 1) +
  guides(colour = guide_legend(title = "Clusters",
                               override.aes = list(size = 3))) +
  theme(text = element_text(size = 12))

## ----hypothal_spatialPlots3---------------------------------------------------
# For visualising cluster-level entropy distribution, we reorder the clusters 
# by their median entropy value in each sample
df_met2 <- spe2 |> colData() %>% as.data.frame()

box_clust2 <- list()
for (s in samplesList) {
  df_met_sub <- df_met2[df_met2[[samples]] == s, ]
  # calculating median entropy of each cluster in a sample
  ct_ent2 <- df_met_sub %>%
    mutate(ClustSIGNAL = as.character(ClustSIGNAL)) %>%
    group_by(ClustSIGNAL) %>%
    summarise(mdEntropy = median(entropy)) %>%
    # reordering clusters by their median entropy
    arrange(mdEntropy)
  
  df_met_sub$ClustSIGNAL <- factor(df_met_sub$ClustSIGNAL, 
                                   levels = ct_ent2$ClustSIGNAL)
  # box plot of cluster entropy
  col_ent2 <- colors[as.numeric(ct_ent2$ClustSIGNAL)]
  box_clust2[[s]] <- df_met_sub %>%
    ggplot(aes(x = ClustSIGNAL, y = entropy, fill = ClustSIGNAL)) +
    geom_boxplot() +
    scale_fill_manual(values = col_ent2) +
    facet_wrap(vars(samples), nrow = 1) +
    labs(x = "ClustSIGNAL clusters", y = "Entropy") +
    ylim(0, NA) +
    theme_classic() +
    theme(strip.text = element_blank(),
          legend.position = "none",
          text = element_text(size = 12),
          axis.text.x = element_text(angle = 90, vjust = 0.5))
}

## ----hypothal_spatialPlots4---------------------------------------------------
spt_clust2 / (patchwork::wrap_plots(box_clust2[1:3], nrow = 1) +
                plot_layout(axes = "collect")) +
  plot_layout(guides = "collect", heights = c(5, 3)) +
  plot_annotation(
    title = "Spatial (top) and entropy (bottom) distributions of clusters",
    theme = theme(plot.title = element_text(hjust = 0.5, face = "bold")))

## ----hypothal_entropyPlots1---------------------------------------------------
hst_ent2 <- spe2 |> colData() %>% 
  as.data.frame() %>%
  ggplot(aes(entropy)) +
  geom_histogram(binwidth = 0.05) +
  facet_wrap(vars(samples), nrow = 1) +
  labs(x = "Entropy", y = "Number of neighbourhoods") +
  theme_classic() +
  theme(text = element_text(size = 12))

## ----hypothal_entropyPlots2---------------------------------------------------
spt_ent2 <- scater::plotReducedDim(spe2, colour_by = "entropy",
                                  # specify spatial low dimension
                                  dimred = "spatial", point_alpha = 1,
                                  point_size = 4, scattermore = TRUE) +
  scale_colour_gradient2("Entropy", low = "grey", high = "blue") +
  scale_size_continuous(range = c(0, max(spe2$entropy))) +
  facet_wrap(vars(spe2[[samples]]), scales = "free", nrow = 1) +
  theme(strip.text = element_blank(),
        text = element_text(size = 12))

## ----hypothal_entropyPlots3---------------------------------------------------
hst_ent2 / spt_ent2 + plot_layout(heights = c(4, 5)) +
    plot_annotation(
      title = "Entropy spread (top) and spatial distribution (bottom)",
      theme = theme(plot.title = element_text(hjust = 0.5, face = "bold")))

## ----ClustSIGNALseq_data------------------------------------------------------
# load logcounts and metadata to the environment
data(mEmbryo2)

# as before, we read the data into a SpatialExperiment object
spe <- SpatialExperiment(assays = list(logcounts = me_expr),
                         colData = me_data, spatialCoordsNames = c("X", "Y"))

## ----ClustSIGNALseq_prep------------------------------------------------------
set.seed(100)
# first we need to generate low dimension data for initial clustering
spe <- scater::runPCA(spe) 

## ----ClustSIGNALseq_step1-----------------------------------------------------
spe <- clustSIGNAL::p1_clustering(spe, dimRed = "PCA")

## ----ClustSIGNALseq_step1_out1------------------------------------------------
spe$initCluster |> head() # clustering output

## ----ClustSIGNALseq_step1_out2------------------------------------------------
spe$initSubcluster |> head() # subclustering output

## ----ClustSIGNALseq_step2-----------------------------------------------------
# This step generates a list of neighbourhood information.
outReg <- clustSIGNAL::neighbourDetect(spe, samples = "sample_id")

## ----ClustSIGNALseq_step2_out1------------------------------------------------
outReg$nnCells[1:3, 1:3]

## ----ClustSIGNALseq_step2_out2------------------------------------------------
outReg$regXclust[[1]] 

## ----ClustSIGNALseq_step3-----------------------------------------------------
spe <- clustSIGNAL::entropyMeasure(spe, outReg$regXclust)

## ----ClustSIGNALseq_step3_out-------------------------------------------------
spe$entropy |> head() # entropy values

## ----ClustSIGNALseq_step4-----------------------------------------------------
spe <- clustSIGNAL::adaptiveSmoothing(spe, outReg$nnCells)

## ----ClustSIGNALseq_step4_out-------------------------------------------------
assay(spe, "smoothed")[1:5, 1:3]

## ----ClustSIGNALseq_step5-----------------------------------------------------
spe <- clustSIGNAL::p2_clustering(spe)

## ----ClustSIGNALseq_step5_out-------------------------------------------------
spe$ClustSIGNAL |> head() # ClustSIGNAL cluster labels

## -----------------------------------------------------------------------------
sessionInfo()