## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, fig.align = "center")

## ----installation, eval=FALSE-------------------------------------------------
# # Installation
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("Coralysis")

## ----packages, message=FALSE, warning=FALSE-----------------------------------
# Import packages
library("scRNAseq")
library("Coralysis")
library("SingleCellExperiment")

## ----data---------------------------------------------------------------------
# Import data
zeisel <- ZeiselBrainData()
romanov <- RomanovBrainData()

## ----preprocess---------------------------------------------------------------
# Normalize
zeisel <- scater::logNormCounts(zeisel)
counts(zeisel) <- NULL
altExps(zeisel) <- list()
romanov <- scater::logNormCounts(romanov)
counts(romanov) <- NULL

# Intersected genes
genes <- intersect(row.names(zeisel), row.names(romanov))
zeisel <- zeisel[genes, ]
romanov <- romanov[genes, ]

# Remove colData
colData(zeisel) <- colData(zeisel)[, "level1class", drop = FALSE]
colnames(colData(zeisel)) <- "cell_type"
colData(romanov) <- colData(romanov)[, "level1 class", drop = FALSE]
colnames(colData(romanov)) <- "cell_type"

# Batch
zeisel[["batch"]] <- "Zeisel et al."
romanov[["batch"]] <- "Romanov et al."

# Merge cells
sce <- cbind(zeisel, romanov)
rm(list = c("zeisel", "romanov"))

# HVG
hvg <- scran::getTopHVGs(sce, n = 2000)
sce <- sce[hvg, ]

## ----prepare data-------------------------------------------------------------
# Prepare data:
# checks 'logcounts' format & removes non-expressed genes
sce <- PrepareData(object = sce)

## ----integration--------------------------------------------------------------
# Multi-level integration
set.seed(4591) # reproducibility of downsampling - 'icp.batch.size'
sce <- RunParallelDivisiveICP(
    object = sce,
    batch.label = "batch",
    icp.batch.size = 1000,
    L = 10, threads = 2,  
    RNGseed = 123
)

## ----integrated emb-----------------------------------------------------------
# Integrated embedding
set.seed(125)
sce <- RunPCA(object = sce)

## ----umap---------------------------------------------------------------------
# UMAP
set.seed(1204)
sce <- RunUMAP(object = sce, min_dist = 0.5, n_neighbors = 15)

## ----viz categorical vars, fig.width=10, fig.height=5.5-----------------------
# Visualize categorical variables integrated emb.
vars <- c("batch", "cell_type")
plots <- lapply(X = vars, FUN = function(x) {
    PlotDimRed(
        object = sce, color.by = x,
        point.size = 0.25, point.stroke = 0.5,
        legend.nrow = 4
    )
})
cowplot::plot_grid(plotlist = plots, ncol = 2, align = "vh") # join plots together

## ----viz categorical vars by batch, fig.width=10, fig.height=5.5--------------
plots[[2]] + ggplot2::facet_grid(~batch)

## ----viz gene exp markers, fig.width=8.5, fig.height=6------------------------
# Visualization of gene expression markers
markers <- c("Acta2", "Myh11", "Tagln", "Cdh5", "Pecam1")
plot.markers <- lapply(markers, function(x) {
    PlotExpression(sce, color.by = x, point.size = 0.5, point.stroke = 0.5) +
        ggplot2::facet_grid(~batch)
})
cowplot::plot_grid(plotlist = plot.markers, ncol = 2)

## ----clustering, fig.width=5, fig.height=5.5----------------------------------
# Graph-based clustering
set.seed(123)
sce$cluster <- scran::clusterCells(sce,
    use.dimred = "PCA",
    BLUSPARAM = bluster::SNNGraphParam(k = 30, cluster.fun = "louvain")
)

# Plot the clustering result
plots[[3]] <- PlotDimRed(
    object = sce, color.by = "cluster",
    point.size = 0.25, point.stroke = 0.5,
    legend.nrow = 4
)
plots[[3]]

## ----viz clustering by batch, fig.width=8, fig.height=10.5--------------------
cowplot::plot_grid(
    plots[[2]] + ggplot2::facet_grid(~batch),
    plots[[3]] + ggplot2::facet_grid(~batch),
    ncol = 1
)

## ----gene coefficients, message=FALSE, warning=FALSE--------------------------
# Get label gene coefficients by majority voting
clts.gene.coeff <- MajorityVotingFeatures(object = sce, label = "cluster")

# Print summary
clts.gene.coeff$summary

## ----viz top coefficients-----------------------------------------------------
## Visualize top coefficients for cluster 10 and 12
clts <- c("10", "12")
plot.coeff.markers <- list()
for (i in clts) {
  top.markers <- clts.gene.coeff$feature_coeff[[i]]
  top.markers <- top.markers[order(top.markers[,2], decreasing = TRUE), ]
  top.markers <- top.markers[1:6, "feature"]
  plot.coeff.markers[[i]] <- lapply(top.markers, function(x) {
      PlotExpression(sce, color.by = x, point.size = 0.3, point.stroke = 0.5) +
          ggplot2::facet_grid(~batch)
  })
}

## ----viz - cluster 10, fig.width=9.5, fig.height=3.5--------------------------
# Cluster 10
cowplot::plot_grid(plotlist = plot.coeff.markers[[1]], ncol = 3)

## ----viz - cluster 12, fig.width=9.5, fig.height=3.5--------------------------
# Cluster 12
cowplot::plot_grid(plotlist = plot.coeff.markers[[2]], ncol = 3)

## ----rsession-----------------------------------------------------------------
# R session
sessionInfo()

