## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = TRUE,
  crop = NULL
)
library(BiocStyle)

## ----installation-------------------------------------------------------------
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("sketchR")

## ----load-pkg-----------------------------------------------------------------
suppressPackageStartupMessages({
    library(sketchR)
    library(TENxPBMCData)
    library(scuttle)
    library(scran)
    library(scater)
    library(SingleR)
    library(celldex)
    library(cowplot)
    library(SummarizedExperiment)
    library(SingleCellExperiment)
    library(beachmat.hdf5)
})

## ----process-data-------------------------------------------------------------
## Load data
pbmc3k <- TENxPBMCData(dataset = "pbmc3k")

## Set row and column names
colnames(pbmc3k) <- paste0("Cell", seq_len(ncol(pbmc3k)))
rownames(pbmc3k) <- uniquifyFeatureNames(
    ID = rowData(pbmc3k)$ENSEMBL_ID,
    names = rowData(pbmc3k)$Symbol_TENx
)

## Normalize and log-transform counts
pbmc3k <- logNormCounts(pbmc3k)

## Find highly variable genes
dec <- modelGeneVar(pbmc3k)
top.hvgs <- getTopHVGs(dec, n = 2000)

## Perform dimensionality reduction
set.seed(100)
pbmc3k <- runPCA(pbmc3k, subset_row = top.hvgs)
pbmc3k <- runTSNE(pbmc3k, dimred = "PCA")

## Predict cell type labels
ref_monaco <- MonacoImmuneData()
pred_monaco_main <- SingleR(test = pbmc3k, ref = ref_monaco, 
                            labels = ref_monaco$label.main)
pbmc3k$labels_main <- pred_monaco_main$labels

dim(pbmc3k)

## ----geosketch----------------------------------------------------------------
idx800gs <- geosketch(reducedDim(pbmc3k, "PCA"), 
                      N = 800, seed = 123)
head(idx800gs)
length(idx800gs)

## ----plot-tsne, fig.width = 10, fig.height = 4--------------------------------
plot_grid(
    plotTSNE(pbmc3k, colour_by = "labels_main"),
    plotTSNE(pbmc3k[, idx800gs], colour_by = "labels_main")
)

## ----plot-abundance, fig.width = 6, fig.height = 8----------------------------
compareCompositionPlot(colData(pbmc3k), 
                       idx = list(geosketch = idx800gs), 
                       column = "labels_main")

## ----plot-diagnostics---------------------------------------------------------
set.seed(123)
hausdorffDistPlot(mat = reducedDim(pbmc3k, "PCA"), 
                  Nvec = c(400, 800, 2000),
                  Nrep = 3, methods = c("geosketch", "uniform"))

## ----session-info-------------------------------------------------------------
sessionInfo()

