## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  error = FALSE,
  warning = FALSE,
  message = FALSE
)

## ----install, eval=FALSE------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install("HCAData")

## ----loadpkg------------------------------------------------------------------
library("HCAData")

## ----firstcall----------------------------------------------------------------
HCAData()

## ----fetchingdata-------------------------------------------------------------
suppressPackageStartupMessages({
  library("ExperimentHub")
  library("SingleCellExperiment")
})

eh <- ExperimentHub()
query(eh, "HCAData")

# these three are the components to the bone marrow dataset
bonemarrow_h5densematrix <- eh[["EH2047"]]
bonemarrow_coldata <- eh[["EH2048"]]
bonemarrow_rowdata <- eh[["EH2049"]]

# and are put together when calling...
sce_bonemarrow <- HCAData("ica_bone_marrow")
sce_bonemarrow

# similarly, to access the umbilical cord blood dataset
sce_cordblood <- HCAData("ica_cord_blood")
sce_cordblood

## ----subset-------------------------------------------------------------------
library("scran")
library("BiocSingular")
library("scater")
library("scuttle")

set.seed(42)
sce <- sce_bonemarrow[, sample(seq_len(ncol(sce_bonemarrow)), 1000, replace = FALSE)]

## ----featureanno--------------------------------------------------------------
rownames(sce) <- uniquifyFeatureNames(rowData(sce)$ID, rowData(sce)$Symbol)
head(rownames(sce))

is.mito <- grep("MT-", rownames(sce))

counts(sce) <- as.matrix(counts(sce))
sce <- scuttle::addPerCellQC(sce, subsets=list(Mito=is.mito))

## ----clustnorm----------------------------------------------------------------
lib.sf.bonemarrow <- librarySizeFactors(sce)
summary(lib.sf.bonemarrow)

set.seed(42)
clusters <- quickCluster(sce)
table(clusters)
sce <- computeSumFactors(sce, min.mean=0.1, cluster=clusters)
sce <- logNormCounts(sce)
assayNames(sce)

## ----hvg----------------------------------------------------------------------
dec.bonemarrow <- modelGeneVarByPoisson(sce)
top.dec <- dec.bonemarrow[order(dec.bonemarrow$bio, decreasing=TRUE),] 
head(top.dec)

fit.bonemarrow <- metadata(dec.bonemarrow)
plot(fit.bonemarrow$mean, fit.bonemarrow$var, xlab="Mean of log-expression",
    ylab="Variance of log-expression", pch = 16)
curve(fit.bonemarrow$trend(x), col="dodgerblue", add=TRUE, lwd=2)
plotExpression(sce, features=rownames(top.dec)[1:10])

## ----dimred-------------------------------------------------------------------
hvg.bonemarrow <- getTopHVGs(dec.bonemarrow, prop = 0.1)

set.seed(42)
sce <- denoisePCA(sce, 
                  technical=dec.bonemarrow, 
                  subset.row = hvg.bonemarrow, 
                  BSPARAM=IrlbaParam())
ncol(reducedDim(sce, "PCA"))
plot(attr(reducedDim(sce), "percentVar"), xlab="PC",
     ylab="Proportion of variance explained")
abline(v=ncol(reducedDim(sce, "PCA")), lty=2, col="red")
plotPCA(sce, ncomponents=3, colour_by="subsets_Mito_percent")

set.seed(42)
sce <- runTSNE(sce, dimred="PCA", perplexity=30)
plotTSNE(sce, colour_by="subsets_Mito_percent")

## ----clusters-----------------------------------------------------------------
snn.gr <- buildSNNGraph(sce, use.dimred="PCA")
clusters <- igraph::cluster_walktrap(snn.gr)
sce$Cluster <- factor(clusters$membership)
table(sce$Cluster)
plotTSNE(sce, colour_by="Cluster")

## ----launchisee, eval=FALSE---------------------------------------------------
# if (require(iSEE)) {
#   iSEE(sce)
# }

## ----save, eval=FALSE---------------------------------------------------------
# destination <- "where/to/store/the/processed/data.rds"
# saveRDS(sce, file = destination)

## ----sessioninfo--------------------------------------------------------------
sessionInfo()

