## ----global_options, include = FALSE------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----setup, message=FALSE, warning=FALSE--------------------------------------
library(SuperCellCyto)
library(SingleCellExperiment)
library(scran)
library(BiocSingular)
library(scater)
library(bluster)
library(data.table)

## ----read_data_in-------------------------------------------------------------
dt <- fread(
    system.file(
        "extdata", 
        "Levine_32dim_subsampledPopulation.csv", 
        package = "SuperCellCyto"
    )
)
head(dt)

## ----create_sce_object--------------------------------------------------------
exprs_mat <- t(
    as.matrix(dt[, !c("population_id", "sample", "cell_id"), with = FALSE])
)

sce <- SingleCellExperiment(
    assays = list(counts = exprs_mat),
    colData = dt[, c("population_id", "sample", "cell_id"), with = FALSE]
)

# assign cell ids as column names
colnames(sce) <- dt$cell_id

sce

## ----subset_and_transform-----------------------------------------------------
markers <- c(
    "CD45RA", "CD133", "CD19", "CD22", "CD11b", "CD4", "CD8",
    "CD34", "Flt3", "CD20", "CXCR4", "CD235ab", "CD45", "CD123", "CD321",
    "CD14", "CD33", "CD47", "CD11c", "CD7", "CD15", "CD16", "CD44", "CD38",
    "CD13", "CD3", "CD61", "CD117",
    "CD49d", "HLA-DR", "CD64", "CD41"
)

# keep only the relevant markers
sce <- sce[markers, ]

# to store arcsinh transformed data
exprs(sce) <- asinh(counts(sce) / 5)

sce

## ----extract_dt_and_run_supercellcyto-----------------------------------------
dt <- data.table(t(exprs(sce)))
dt$sample <- colData(sce)$sample
dt$cell_id <- colnames(sce)

supercells <- runSuperCellCyto(
    dt = dt,
    markers = markers,
    sample_colname = "sample",
    cell_id_colname = "cell_id",
    gam = 5
)

head(supercells$supercell_expression_matrix)

## ----add_supercell_id_to_coldata----------------------------------------------
colData(sce)$supercell_id <-  factor(supercells$supercell_cell_map$SuperCellID)
head(colData(sce))

## ----create_supercell_sce-----------------------------------------------------
supercell_sce <- SingleCellExperiment(
    list(logcounts = t(
        supercells$supercell_expression_matrix[, markers, with = FALSE]
    )),
    colData = DataFrame(
        SuperCellId = supercells$supercell_expression_matrix$SuperCellId,
        sample = supercells$supercell_expression_matrix$sample
    )
)
colnames(supercell_sce) <- colData(supercell_sce)$SuperCellId
supercell_sce

## ----cluster_and_umap_supercells----------------------------------------------
set.seed(42)

supercell_sce <- fixedPCA(
    supercell_sce,
    rank = 10,
    subset.row = NULL,
    BSPARAM = RandomParam()
)
supercell_sce <- runUMAP(supercell_sce, dimred = "PCA")

clusters <- clusterCells(
    supercell_sce, use.dimred = "PCA",
    BLUSPARAM = SNNGraphParam(cluster.fun = "leiden")
)

colLabels(supercell_sce) <- clusters

plotReducedDim(supercell_sce, dimred = "UMAP", colour_by = "label")

## ----plot_marker_expression_supercells----------------------------------------
plotExpression(
    supercell_sce, c("CD4", "CD8", "CD19", "CD34", "CD11b"),
    x = "label", colour_by = "sample"
)

## ----transfer_cluster_to_singlecell-------------------------------------------
cell_id_sce <- data.table(as.data.frame(colData(sce)))
supercell_cluster <- data.table(as.data.frame(colData(supercell_sce)))
cell_id_sce_with_clusters <- merge.data.table(
    x = cell_id_sce,
    y = supercell_cluster,
    by.x = "supercell_id",
    by.y = "SuperCellId",
    sort = FALSE
)

## ----add_cluster_to_coldata---------------------------------------------------
colData(sce)$cluster <- cell_id_sce_with_clusters$label

## ----umap_singlecell_colored_by_cluster---------------------------------------
sce <- fixedPCA(sce, rank = 10, subset.row = NULL, BSPARAM = RandomParam())
sce <- runUMAP(sce, dimred = "PCA")

plotReducedDim(sce, dimred = "UMAP", colour_by = "cluster")


## ----plot_marker_expression_singlecell----------------------------------------
plotExpression(
    sce, c("CD4", "CD8", "CD19", "CD34", "CD11b"),
    x = "cluster", colour_by = "sample"
)

## ----session_info-------------------------------------------------------------
sessionInfo()

