## ----include = FALSE, echo=FALSE, results="hide", message=FALSE---------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----load_packages, echo=FALSE, message=FALSE---------------------------------
library(SuperCellCyto)
library(parallel)
library(BiocParallel)

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("SuperCellCyto")

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("pak", quietly = TRUE))
#     install.packages("pak")
# pak::install_github("phipsonlab/SuperCellCyto")

## ----simulate_data------------------------------------------------------------
n_markers <- 15
n_samples <- 3
dat <- simCytoData(nmarkers = n_markers, ncells = rep(10000, n_samples))
head(dat)

## ----arcsinh_transformation---------------------------------------------------
# Specify which columns are the markers to transform
marker_cols <- paste0("Marker_", seq_len(n_markers))
# The co-factor for arc-sinh
cofactor <- 5

# Do the transformation
dat_asinh <- asinh(dat[, marker_cols, with = FALSE] / cofactor)

# Rename the new columns
marker_cols_asinh <- paste0(marker_cols, "_asinh")
names(dat_asinh) <- marker_cols_asinh

# Add them our previously loaded data
dat <- cbind(dat, dat_asinh)

head(dat[, marker_cols_asinh, with = FALSE])

## ----create_cell_id-----------------------------------------------------------
dat$Cell_id_dummy <- paste0("Cell_", seq_len(nrow(dat)))
head(dat$Cell_id_dummy, n = 10)

## ----check_sample_col---------------------------------------------------------
unique(dat$Sample)

## ----set_colnames-------------------------------------------------------------
sample_col <- "Sample"
cell_id_col <- "Cell_id_dummy"

## ----run_supercellcyto--------------------------------------------------------
supercells <- runSuperCellCyto(
    dt = dat,
    markers = marker_cols_asinh,
    sample_colname = sample_col,
    cell_id_colname = cell_id_col
)

## ----check_supercells_class---------------------------------------------------
class(supercells)

## ----check_supercells_names---------------------------------------------------
names(supercells)

## ----show_supercell_expr_matrix-----------------------------------------------
head(supercells$supercell_expression_matrix)

## ----show_supercell_ids-------------------------------------------------------
head(unique(supercells$supercell_expression_matrix$SuperCellId))

## ----show_supercell_1_ids-----------------------------------------------------
supercell_ids <- unique(supercells$supercell_expression_matrix$SuperCellId)
supercell_ids[grep("SuperCell_1_", supercell_ids)]

## ----show_supercell_cell_map--------------------------------------------------
head(supercells$supercell_cell_map)

## ----run_supercellcyto_parallel-----------------------------------------------
supercell_par <- runSuperCellCyto(
    dt = dat,
    markers = marker_cols_asinh,
    sample_colname = sample_col,
    cell_id_colname = cell_id_col,
    BPPARAM = MulticoreParam(tasks = n_samples),
    load_balancing = TRUE
)

## ----recompute_supercells-----------------------------------------------------
addt_gamma_vals <- c(10, 50)
supercells_addt_gamma <- lapply(addt_gamma_vals, function(gam) {
    recomputeSupercells(
        dt = dat,
        sc_objects = supercells$supercell_object,
        markers = marker_cols_asinh,
        sample_colname = sample_col,
        cell_id_colname = cell_id_col,
        gam = gam
    )
})

## ----show_supercells_gamma10--------------------------------------------------
supercells_addt_gamma[[1]]

## ----count_supercells---------------------------------------------------------
n_supercells_gamma20 <- nrow(supercells$supercell_expression_matrix)
n_supercells_gamma10 <- nrow(
    supercells_addt_gamma[[1]]$supercell_expression_matrix
)
n_supercells_gamma50 <- nrow(
    supercells_addt_gamma[[2]]$supercell_expression_matrix
)

## ----gamma10_gt_gamma20-------------------------------------------------------
n_supercells_gamma10 > n_supercells_gamma20

## ----gamma50_lt_gamma20-------------------------------------------------------
n_supercells_gamma50 < n_supercells_gamma20

## ----diff_gamma_per_sample----------------------------------------------------
n_markers <- 10
dat <- simCytoData(nmarkers = n_markers)
markers_col <- paste0("Marker_", seq_len(n_markers))
sample_col <- "Sample"
cell_id_col <- "Cell_Id"

samples <- unique(dat[[sample_col]])
gam_values <- c(10, 20, 10)

supercells_diff_gam <- lapply(seq_len(length(samples)), function(i) {
    sample <- samples[i]
    gam <- gam_values[i]
    dat_samp <- dat[dat$Sample == sample, ]
    supercell_samp <- runSuperCellCyto(
        dt = dat_samp,
        markers = markers_col,
        sample_colname = sample_col,
        cell_id_colname = cell_id_col,
        gam = gam
    )
    return(supercell_samp)
})

## ----combine_supercell_results------------------------------------------------
supercell_expression_matrix <- do.call(
    "rbind", lapply(
        supercells_diff_gam, function(x) x[["supercell_expression_matrix"]]
    )
)

supercell_cell_map <- do.call(
    "rbind", lapply(
        supercells_diff_gam, function(x) x[["supercell_cell_map"]]
    )
)

## ----show_combined_expr_matrix------------------------------------------------
rbind(
    head(supercell_expression_matrix, n = 3),
    tail(supercell_expression_matrix, n = 3)
)

## ----show_combined_cell_map---------------------------------------------------
rbind(head(supercell_cell_map, n = 3), tail(supercell_cell_map, n = 3))

## ----session_info-------------------------------------------------------------
sessionInfo()

