## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, fig.align = "center")

## ----packages, message=FALSE, warning=FALSE-----------------------------------
# Import packages
library("dplyr")
library("scater")
library("ggplot2")
library("scRNAseq")
library("Coralysis")
library("ComplexHeatmap")
library("SingleCellExperiment")

## ----import data, message=FALSE, warning=FALSE--------------------------------
# Import object
sce <- BachMammaryData()
colnames(sce) <- paste0("cell", 1:ncol(sce)) # create cell names

## ----Seurat normalisation,  message=FALSE, warning=FALSE----------------------
## Normalize the data
set.seed(123)
sce <- scater::logNormCounts(sce)
counts(sce) <- NULL

## ----hvg----------------------------------------------------------------------
# Feature selection with 'scran' package
nhvg <- 500
hvg <- scran::getTopHVGs(sce, n = nhvg)
hvg.idx <- which(row.names(sce) %in% hvg)
sce <- sce[hvg.idx, ]
row.names(sce) <- rowData(sce)$Symbol

## ----multi-level integration, warning=FALSE-----------------------------------
# Perform multi-level integration
set.seed(123)
sce <- RunParallelDivisiveICP(object = sce, L = 25, 
                              icp.batch.size = 1000,
                              build.train.set = FALSE,
                              threads = 2)

## ----integrated dimred--------------------------------------------------------
# Dimensional reduction - unintegrated
set.seed(123)
sce <- RunPCA(
    object = sce, assay.name = "joint.probability",
    return.model = TRUE
)

# UMAP
set.seed(123)
sce <- RunUMAP(
    object = sce, umap.method = "uwot",
    dims = 1:30, n_neighbors = 15,
    min_dist = 0.5, return.model = TRUE
)

## ----basal vs luminal cells, fig.width=7, fig.height=2.75---------------------
# Detect basal versus luminal cells
markers <- c("Krt5", "Krt18")
plots <- lapply(markers, function(x) {
    PlotExpression(sce,
        color.by = x, point.size = 0.2,
        point.stroke = 0.2, scale.values = TRUE
    )
})
cowplot::plot_grid(plotlist = plots, ncol = 2)

## ----cell state identification------------------------------------------------
# Summarise cell cluster probability
sce <- SummariseCellClusterProbability(object = sce, icp.round = 4) # save result in 'colData'
# colData(sce) # check the colData

## ----plot probability, fig.width=5, fig.height=4------------------------------
# Plot cell cluster probabilities - mean
# possible options: "mean_probs", "median_probs", "scaled_median_probs"
PlotExpression(
    object = sce, color.by = "scaled_mean_probs",
    color.scale = "viridis", point.size = 0.2,
    point.stroke = 0.1, legend.title = "Mean prob.\n(min-max)"
)

## ----icp clusters, fig.width=5, fig.height=6----------------------------------
# ICP clusters: identify the ICP probability table with the highest standard deviation
probs <- GetCellClusterProbability(object = sce, icp.round = 4, concatenate = FALSE)
probs.sd <- lapply(X = probs, FUN = function(x) {
    sd(x)
})
icp.run <- which.max(probs.sd) # 7
clt <- paste0("icp_run_round_", icp.run, "_4_clusters")
sce[[clt]] <- factor(sce[[clt]], levels = as.character(1:16))
PlotDimRed(sce,
    color.by = clt, point.size = 0.1,
    point.stroke = 0.1, legend.nrow = 5
)

## ----gene coefficients, message=FALSE, warning=FALSE, fig.width = 5.5, fig.height=14----
# Get gene coefficients
gene.coeff <- GetFeatureCoefficients(sce, icp.run = icp.run, icp.round = 4)
row.names(gene.coeff$icp_56) <- gene.coeff$icp_56$feature
gene.coeff$icp_56 <- gene.coeff$icp_56[, -1]

# Plot top positive coefficients in heatmap
positive.coeff <- (rowSums(gene.coeff$icp_56 > 0) > 0)
heat <- ComplexHeatmap::Heatmap(
    matrix = as.matrix(gene.coeff$icp_56)[positive.coeff, ],
    name = "Gene coef.",
    cluster_columns = FALSE, show_row_dend = FALSE, show_row_names = TRUE,
    row_names_gp = grid::gpar(fontsize = 7)
)
print(heat)

## ----gene coefficients per luminal cluster, fig.width=20, fig.height=10-------
# Check top gene coefficients per basal cluster: 3, 11, 12
gene.coeff.basal <- gene.coeff$icp_56[positive.coeff, ] %>%
    mutate("gene" = row.names(.)) 
top5.luminal <- top5.luminal.plts <- list()
for (i in as.character(c(3, 11, 12))) {
    clt.no <- paste0("clt", i)
    top5.luminal[[clt.no]] <- gene.coeff.basal %>%
        dplyr::select(all_of(c("gene", paste0("coeff_", clt.no)))) %>%
        arrange(desc(.data[[paste0("coeff_", clt.no)]])) %>%
        head(5) %>%
        pull(gene)
    top5.luminal.plts[[clt.no]] <- cowplot::plot_grid(plotlist = lapply(top5.luminal[[clt.no]], function(x) {
        PlotExpression(sce,
            color.by = x, point.size = 0.1, point.stroke = 0.1,
            scale.values = TRUE
        ) + ggplot2::ggtitle(gsub("clt", "cluster: ", clt.no))
    }), ncol = 5, align = "vh")
}
cowplot::plot_grid(plotlist = top5.luminal.plts, nrow = 3, align = "vh")

## ----graph-based clustering, fig.width=5, fig.height=6------------------------
### Graph-based clustering with scran
## Coralysis integrated PCA embedding
bluster.params <- bluster::SNNGraphParam(
    k = 30, cluster.fun = "louvain",
    cluster.args = list(resolution = 0.8)
)
set.seed(1024)
sce$Cluster <- scran::clusterCells(sce,
    use.dimred = "PCA",
    BLUSPARAM = bluster.params
)
PlotDimRed(sce,
    color.by = "Cluster", point.size = 0.2,
    point.stroke = 0.2, legend.nrow = 4
)

## ----cell cluster prob. bins, fig.width=9, fig.height=4.5---------------------
# cell states SCE object
sce.bins <- BinCellClusterProbability(sce, label = "Cluster", bins = 30)

# Project Coralysis bins onto single-cell UMAP
sce.bins <- ReferenceMapping(sce, sce.bins, ref.label = "Cluster", project.umap = TRUE)
umap.bins.labels <- PlotDimRed(sce.bins, color.by = "coral_labels")
umap.bins.probs <- PlotExpression(sce.bins,
    color.by = "aggregated_probability_bins",
    color.scale = "viridis",
    legend.title = "Prob. bins"
)
cowplot::plot_grid(umap.bins.labels, umap.bins.probs, ncol = 2, align = "vh")

## ----coralysis labels, fig.width=12, fig.height=6-----------------------------
# Coralysis labels: single cells vs bins
cowplot::plot_grid(
    PlotDimRed(sce,
        color.by = "Cluster", point.size = 0.2,
        point.stroke = 0.2, legend.nrow = 2
    ),
    umap.bins.labels
)

## ----coralysis probability, fig.width=12, fig.height=5------------------------
# Coralysis probability: single cells vs bins
cowplot::plot_grid(
    PlotExpression(
        object = sce, color.by = "scaled_mean_probs",
        color.scale = "viridis", point.size = 0.2,
        point.stroke = 0.1, legend.title = "Mean prob.\n(min-max)"
    ),
    umap.bins.probs
)

## ----differential expression, fig.width=10, fig.height=6----------------------
# Differential expression programs
corr.features <- CellBinsFeatureCorrelation(object = sce.bins, labels = "10")
top30.corr.clt10 <- corr.features %>%
    arrange(desc(abs(`10`))) %>%
    head(30)

# Heatmap of top 30 genes with the highest absolute Pearson correlation
pick.genes <- row.names(top30.corr.clt10)
mtx <- as.matrix(logcounts(sce.bins[pick.genes, paste0("10_bin", 1:30)]))
mtx <- t(scale(t(mtx)))
col_fun2 <- circlize::colorRamp2(c(0.8, 0.9, 1.0), scales::viridis_pal()(3))
heat.diff.exp <- Heatmap(
    matrix = mtx, name = "Row Z-score\n(normalized data)",
    cluster_rows = TRUE, cluster_columns = FALSE,
    show_column_dend = FALSE, show_row_dend = FALSE,
    use_raster = TRUE, show_column_names = TRUE,
    top_annotation = HeatmapAnnotation(
        "Probability" = colData(sce.bins[, paste0("10_bin", 1:30)])$aggregated_probability_bins,
        simple_anno_size = unit(0.25, "cm"),
        col = list("Probability" = col_fun2), show_annotation_name = FALSE,
        show_legend = TRUE,
        annotation_legend_param = list(Probability = list(
            direction = "horizontal",
            title_position = "topcenter"
        ))
    )
)
print(heat.diff.exp)

## ----plot expression, fig.width=12, fig.height=10-----------------------------
# Look into Cited1 expression
genes <- c("Glycam1", "Fabp3")
cowplot::plot_grid(
    PlotExpression(sce,
        color.by = genes[1], point.size = 0.5,
        point.stroke = 0.5, scale.values = TRUE
    ),
    PlotExpression(sce.bins,
        color.by = genes[1], point.size = 1,
        point.stroke = 1, scale.values = TRUE
    ),
    PlotExpression(sce,
        color.by = genes[2], point.size = 0.5,
        point.stroke = 0.5, scale.values = TRUE
    ),
    PlotExpression(sce.bins,
        color.by = genes[2], point.size = 1,
        point.stroke = 1, scale.values = TRUE
    ),
    
    ncol = 2
)

## ----rsession-----------------------------------------------------------------
# R session
sessionInfo()