## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, fig.align = "center")

## ----packages, message=FALSE, warning=FALSE-----------------------------------
# Packages
library("scran")
library("ggplot2")
library("Coralysis")
library("SingleCellExperiment")
library("MouseGastrulationData")

## ----data, message=FALSE, warning=FALSE---------------------------------------
# Import data
sce <- EmbryoAtlasData(samples = c(1, 3))

## ----Seurat normalisation, echo=TRUE, message=FALSE, warning=FALSE------------
## Normalize the data
# log1p normalization
SeuratNormalisation <- function(object) {
    # 'SeuratNormalisation()' function applies the basic Seurat normalization to
    # a SingleCellExperiment object with a 'counts' assay. Normalized data
    # is saved in the 'logcounts' assay.
    logcounts(object) <- apply(counts(object), 2, function(x) {
        log1p(x / sum(x) * 10000)
    }) # log1p normalization w/ 10K scaling factor
    logcounts(object) <- as(logcounts(object), "sparseMatrix")
    return(object)
}
sce <- SeuratNormalisation(object = sce)

# Remove 'counts' to reduce data size object
counts(sce) <- NULL

## ----hvg----------------------------------------------------------------------
# Feature selection with 'scran' package
nhvg <- 500
batch.label <- "pool"
sce[[batch.label]] <- factor(sce[[batch.label]])
m.hvg <- scran::modelGeneVar(sce, block = sce[[batch.label]])
top.hvg <- getTopHVGs(m.hvg, n = nhvg)

# Subset object
sce <- sce[top.hvg, ]

# Rename genes: 'Ensembl ID' to SYMBOL
row.names(sce) <- rowData(sce)$SYMBOL
rowData(sce) <- NULL

## ----dimred: pre-integration, fig.width=8.5, fig.height=7.25------------------
# Compute PCA & TSNE
set.seed(123)
sce <- RunPCA(
    object = sce,
    assay.name = "logcounts",
    p = 30, dimred.name = "unintPCA"
)
set.seed(123)
sce <- RunTSNE(sce,
    dimred.type = "unintPCA",
    dimred.name = "unintTSNE"
)

# Plot TSNE highlighting the batch & cell type
unint.batch.plot <- PlotDimRed(
    object = sce,
    color.by = "pool",
    dimred = "unintTSNE",
    point.size = 0.01,
    legend.nrow = 1,
    seed.color = 1024
)
unint.cell.plot <- PlotDimRed(
    object = sce,
    color.by = "celltype",
    dimred = "unintTSNE",
    point.size = 0.01,
    legend.nrow = 14,
    seed.color = 7
)
cowplot::plot_grid(unint.batch.plot, unint.cell.plot, ncol = 2, align = "vh")

## ----multi-level integration, message=TRUE, warning=FALSE---------------------
# Prepare data for integration:
# remove non-expressing genes & logcounts is from `dgCMatrix` class
sce <- PrepareData(object = sce)

# Perform integration with Coralysis
set.seed(1024)
sce <- RunParallelDivisiveICP(
    object = sce,
    batch.label = "pool",
    build.train.set = FALSE,
    L = 10, threads = 2
)

## ----dimred: post-integration, fig.width=8.5, fig.height=7.25-----------------
# Compute PCA with joint cluster probabilities & TSNE
set.seed(123)
sce <- RunPCA(sce,
    assay.name = "joint.probability",
    dimred.name = "intPCA"
)
set.seed(123)
sce <- RunTSNE(sce,
    dimred.type = "intPCA",
    dimred.name = "intTSNE"
)

# Plot TSNE highlighting the batch & cell type
int.batch.plot <- PlotDimRed(
    object = sce,
    color.by = "pool",
    dimred = "intTSNE",
    point.size = 0.01,
    legend.nrow = 1,
    seed.color = 1024
)
int.cell.plot <- PlotDimRed(
    object = sce,
    color.by = "celltype",
    dimred = "intTSNE",
    point.size = 0.01,
    legend.nrow = 14,
    seed.color = 7
)
cowplot::plot_grid(int.batch.plot, int.cell.plot,
    ncol = 2, align = "vh"
)

## ----clustering, fig.width=11.5, fig.height=7.25------------------------------
# Graph-based clustering on the integrated PCA w/ 'scran' package
blusparams <- bluster::SNNGraphParam(k = 15, cluster.fun = "louvain")
set.seed(123)
sce$cluster <- scran::clusterCells(sce,
    use.dimred = "intPCA",
    BLUSPARAM = blusparams
)

# Plot clustering
clt.plot <- PlotDimRed(
    object = sce,
    color.by = "cluster",
    dimred = "intTSNE",
    point.size = 0.01,
    legend.nrow = 3,
    seed.color = 65
)
cowplot::plot_grid(int.batch.plot, int.cell.plot,
    clt.plot,
    ncol = 3, align = "h"
)

## ----cluster markers, fig.width=5, fig.height=5-------------------------------
# Cluster markers
cluster.markers <- FindAllClusterMarkers(object = sce, clustering.label = "cluster")

# Select the top 3 positive markers per cluster
top3.markers <- lapply(X = split(x = cluster.markers, f = cluster.markers$cluster), FUN = function(x) {
    head(x[order(x$log2FC, decreasing = TRUE), ], n = 3)
})
top3.markers <- do.call(rbind, top3.markers)
top3.markers <- top3.markers[order(as.numeric(top3.markers$cluster)), ]

# Heatmap of the top 3 positive markers per cluster
HeatmapFeatures(
    object = sce,
    clustering.label = "cluster",
    features = top3.markers$marker,
    seed.color = 65
)

## ----dge, fig.width=9, fig.height=4.5-----------------------------------------
# DGE analysis: cluster 3 vs 6
dge.clt3vs6 <- FindClusterMarkers(sce,
    clustering.label = "cluster",
    clusters.1 = "3",
    clusters.2 = "6"
)
head(dge.clt3vs6[order(abs(dge.clt3vs6$log2FC), decreasing = TRUE), ])
top6.degs <- head(dge.clt3vs6[order(abs(dge.clt3vs6$log2FC),
    decreasing = TRUE
), "marker"])
exp.plots <- lapply(X = top6.degs, FUN = function(x) {
    PlotExpression(
        object = sce, color.by = x,
        scale.values = TRUE, point.size = 0.5, point.stroke = 0.5
    )
})
cowplot::plot_grid(plotlist = exp.plots, align = "vh", ncol = 3)

## ----rsession-----------------------------------------------------------------
# R session
sessionInfo()

