## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    ## see https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html
    crop = NULL
)

## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE----------------
## Bib setup
library("RefManageR")

## Write bibliography information
bib <- c(
    R = citation(),
    BiocStyle = citation("BiocStyle")[1],
    knitr = citation("knitr")[1],
    RefManageR = citation("RefManageR")[1],
    rmarkdown = citation("rmarkdown")[1],
    sessioninfo = citation("sessioninfo")[1],
    testthat = citation("testthat")[1],
    StatescopeR = citation("StatescopeR")[1]
)

## ----"install", eval = FALSE--------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
# }
# 
# BiocManager::install("StatescopeR")
# 
# ## Check that you have a valid Bioconductor installation
# BiocManager::valid()

## ----'Prepare scRNAseq data'--------------------------------------------------
## Load StatescopeR & scRNAseq (for example data)
suppressMessages(library(StatescopeR))
suppressMessages(library(scRNAseq))
suppressMessages(library(scuttle))

## Load the SegerstolpePancreas data set
scRNAseq <- SegerstolpePancreasData()

## remove duplicate genes
scRNAseq <- scRNAseq[!duplicated(rownames(scRNAseq)), ]

## remove cells with no cell type label
scRNAseq <- scRNAseq[, !is.na(scRNAseq$`cell type`)]

## remove very rare cell types (<100 cells in total data set)
celltypes_to_remove <-
    names(table(scRNAseq$`cell type`)[(table(scRNAseq$`cell type`) < 100)])
scRNAseq <- scRNAseq[, !scRNAseq$`cell type` %in% celltypes_to_remove]

## ----'Prepare StatescopeR input'----------------------------------------------
## Normalize (cp10k) and logtransform scRNAseq
cpm(scRNAseq) <- calculateCPM(scRNAseq)
logcounts(scRNAseq) <- log1p(cpm(scRNAseq) / 100)

## Create scRNAseq reference/signature with 50 hvg for quick example
signature <- create_signature(scRNAseq,
    hvg_genes = TRUE, n_hvg_genes = 50L,
    labels = scRNAseq$`cell type`
)

## select subset of genes for deconvolution (30/50 hvg to make it quick)
selected_genes <- select_genes(scRNAseq, 30L, 50L,
    labels = scRNAseq$`cell type`
)

## Create pseudobulk and normalize to cp10k (logging is done within Statescope)
pseudobulk <- aggregateAcrossCells(scRNAseq, ids = scRNAseq$individual)
normcounts(pseudobulk) <- calculateCPM(pseudobulk) / 100
pseudobulk <- as(pseudobulk, "SummarizedExperiment")
rownames(pseudobulk) <- rownames(scRNAseq)

## (optional) Create prior expectation for ductal cells
prior <- gather_true_fractions(scRNAseq,
    ids = scRNAseq$individual,
    label_col = "cell type"
) # Use True sc fractions for this
prior[rownames(prior) != "ductal cell", ] <- NA # Keep only ductal cells
prior <- t(prior) # Transpose it to nSample x nCelltype

## ----'Run StatescopeR'--------------------------------------------------------
## Run Deconvolution module
Statescope <- BLADE_deconvolution(signature, pseudobulk, selected_genes, prior)

## Run Refinement module
Statescope <- Refinement(Statescope, signature, pseudobulk, cores = 2L)


## Run State Discovery module (pick k=2 for quick example)
Statescope <- StateDiscovery(Statescope, k = 2L, Ncores = 2L)

## ----'Evaluate Statescope'----------------------------------------------------
## Count fractions of cells per sample in the pseudobulk
true_fractions <- gather_true_fractions(scRNAseq,
    ids = scRNAseq$individual,
    label_col = "cell type"
)

## Plot correlation and RMSE with true fractions per celltype
fraction_eval(Statescope, true_fractions)

## ----'Downstream analysis/visualizations'-------------------------------------
## Show predicted fractions
metadata(Statescope)$fractions

## Show predicted ductal cell type specific gene expression profiles
assay(metadata(Statescope)$ct_specific_gep$`ductal cell`, "weighted_gep")

## Show ductal cell state scores per sample
metadata(Statescope)$statescores$`ductal cell`

## Show ductal cell state loadings
metadata(Statescope)$stateloadings$`ductal cell`

## Plot heatmap of fractions
fraction_heatmap(Statescope)

## Plot barplot of top state loadings
barplot_stateloadings(Statescope)

## ----'Group expectations'-----------------------------------------------------
## define cell types in one group
grouped_cts <- c("alpha cell", "beta cell", "gamma cell", "delta cell")

## initialize group assignment matrix (nCelltype x nGroup) with 0's
celltype_names <- colnames(signature$mu)
group_names <- c("Group", setdiff(celltype_names, grouped_cts))
nCelltype <- length(celltype_names)
nGroup <- length(group_names)

group <- matrix(0,
    nrow = nGroup, ncol = nCelltype,
    dimnames = list(group_names, celltype_names)
)


## initialize prior fraction matrix (nSamples x nCelltypes) with NA's
nSamples <- ncol(pseudobulk)
sample_names <- colnames(pseudobulk)

prior <- matrix(
    nrow = nSamples, ncol = nGroup,
    dimnames = list(sample_names, group_names)
)


## Assign cell types to groups
for (ct in celltype_names) {
    if (ct %in% grouped_cts) {
        ## assign cell type to group
        group["Group", ct] <- 1
    } else {
        ## Assign cell type to its own group
        group[ct, ct] <- 1
    }
}

## Add grouped prior fractions to prior
true_fractions <- gather_true_fractions(scRNAseq, ids = scRNAseq$individual,
     label_col = "cell type")

prior[names(true_fractions), "Group"] <- colSums(as.matrix(
    true_fractions[grouped_cts, ]
))


## init group prior
group_prior <- list("Group" = group, "Expectation" = prior)

## Now you can just run Statescope as with a normal prior. Note we do not give
## a ductal cell prior this time
Statescope <- BLADE_deconvolution(signature, pseudobulk, selected_genes,
    group_prior,
    cores = 1L,
    Nrep = 1L
)

## ----"citation"---------------------------------------------------------------
## Citation info
citation("StatescopeR")

## ----reproduce3, echo=FALSE---------------------------------------------------
## Session info
library("sessioninfo")
options(width = 80)
session_info()

