## ----global_options, include = FALSE------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----setup, message=FALSE, warning=FALSE--------------------------------------
library(qs2)
library(Seurat)
library(data.table)
library(SuperCellCyto)

set.seed(42)

## ----load_seurat_obj----------------------------------------------------------
seurat_obj <- qs_read(
    system.file(
        "extdata", 
        "Levine_32dim_seurat_sub.qs2", 
        package = "SuperCellCyto"
        )
    )
seurat_obj

## ----subset_and_transform-----------------------------------------------------
markers <- c(
    "CD45RA", "CD133", "CD19", "CD22", "CD11b", "CD4", "CD8", "CD34",
    "Flt3", "CD20", "CXCR4", "CD235ab", "CD45", "CD123", "CD321", "CD14",
    "CD33", "CD47", "CD11c", "CD7", "CD15", "CD16", "CD44", "CD38", "CD13",
    "CD3", "CD61", "CD117", "CD49d", "HLA-DR", "CD64", "CD41"
)

# keep only the relevant markers
seurat_obj <- seurat_obj[markers, ]

# to store arcsinh transformed data
seurat_obj[["originalexp"]]$data <- asinh(
    seurat_obj[["originalexp"]]$counts / 5
)

seurat_obj

## ----extract_dt_and_run_supercellcyto-----------------------------------------
# check.names set to FALSE so HLA-DR is not replaced with HLA.DR
dt <- data.table(
    t(data.frame(seurat_obj[["originalexp"]]$data, check.names = FALSE))
)
# add the cell_id and sample metadata
dt <- cbind(dt, seurat_obj[[c("cell_id", "sample")]])

supercells <- runSuperCellCyto(
    dt = dt,
    markers = markers,
    sample_colname = "sample",
    cell_id_colname = "cell_id",
    gam = 5
)

head(supercells$supercell_expression_matrix)

## ----add_supercell_id_to_metadata---------------------------------------------
seurat_obj$supercell_id <- factor(supercells$supercell_cell_map$SuperCellID)
head(seurat_obj[[]])

## ----create_supercell_seurat_obj----------------------------------------------
supercell_mat <- t(
    supercells$supercell_expression_matrix[, markers, with = FALSE]
)
colnames(supercell_mat) <- supercells$supercell_expression_matrix$SuperCellId
supercell_seurat_obj <- CreateSeuratObject(counts = supercell_mat)
supercell_seurat_obj[["RNA"]]$data <- supercell_seurat_obj[["RNA"]]$counts

supercell_seurat_obj

## ----cluster_and_umap_supercells, message=FALSE-------------------------------
# Have to do this, otherwise Seurat will complain
supercell_seurat_obj <- ScaleData(supercell_seurat_obj)

supercell_seurat_obj <- RunPCA(
    object = supercell_seurat_obj,
    npcs = 10,
    nfeatures.print = 1,
    approx = FALSE,
    seed.use = 42,
    features = markers
)

supercell_seurat_obj <- FindNeighbors(supercell_seurat_obj, dims = 1:10)
supercell_seurat_obj <- FindClusters(supercell_seurat_obj, resolution = 0.5)
supercell_seurat_obj <- RunUMAP(supercell_seurat_obj, dims = 1:10)

DimPlot(supercell_seurat_obj, reduction = "umap")

## ----featureplot_supercells, height=10, width=10------------------------------
FeaturePlot(
    supercell_seurat_obj,
    features = c("CD4", "CD8", "CD19", "CD34", "CD11b"), ncol = 3
)

## ----merge_clusters_to_metadata-----------------------------------------------
clusters <- data.table(
    supercell_id = colnames(supercell_seurat_obj),
    cluster = as.vector(Idents(supercell_seurat_obj))
)

cell_metadata <- seurat_obj[[]]
cell_metadata <- merge.data.table(
    x = cell_metadata,
    y = clusters,
    by = "supercell_id",
    sort = FALSE
)

## ----add_cluster_to_metadata--------------------------------------------------
seurat_obj$cluster <- cell_metadata$cluster
Idents(seurat_obj) <- "cluster"

## ----cluster_and_umap_singlecell, message=FALSE-------------------------------
seurat_obj <- ScaleData(seurat_obj)
seurat_obj <- RunPCA(
    object = seurat_obj,
    npcs = 10,
    nfeatures.print = 1,
    approx = FALSE,
    seed.use = 42,
    features = markers
)

seurat_obj <- FindNeighbors(seurat_obj, dims = 1:10)
seurat_obj <- FindClusters(seurat_obj, resolution = 0.5)
seurat_obj <- RunUMAP(seurat_obj, dims = 1:10)

DimPlot(seurat_obj, reduction = "umap")


## ----featureplot_singlecell---------------------------------------------------
FeaturePlot(
    seurat_obj,
    features = c("CD4", "CD8", "CD19", "CD34", "CD11b"), ncol = 3
)

## ----session_info-------------------------------------------------------------
sessionInfo()

