## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
    collapse=TRUE, 
    comment="#>", 
    warning=FALSE, 
    message=FALSE
)

## ----quick_start, results='hide'----------------------------------------------
library(scuttle)
library(SanityR)

sce <- mockSCE()
sce <- computeLibraryFactors(sce) # or any other method to compute size factors
sce <- Sanity(sce)
logcounts(sce)

## ----simulate-----------------------------------------------------------------
library(SingleCellExperiment)
library(SanityR)

set.seed(42)
sce <- simulate_branched_random_walk(
    N_gene=1000,  
    N_path=10, 
    length_path=12
)
sce

## ----run_sanity---------------------------------------------------------------
sf <- colSums(counts(sce))
sizeFactors(sce) <- sf / mean(sf)
sce <- Sanity(sce)
sce

## ----sanity_accuracy, fig.wide=TRUE, fig.cap="Gene Level Parameters. Comparison of Sanity estimates with latent values used to generate the data."----
par(mfrow=c(1, 2))
plot(
    sanity_log_activity_mean ~ ltq_mean, 
    data=rowData(sce),
    frame=FALSE, 
    pch=16, 
    col="#00000044", 
    xlab="Mean UMI counts",  
    ylab="Sanity Estimate", 
    main="Mean Log Transcriptional Activity"
)
abline(0, 1, col=2, lwd=2, lty=2)
legend("top", legend="y=x", col=2, lwd=2, lty=2, bty="n")

plot(
    sanity_activity_sd^2  ~ ltq_var, data=rowData(sce), 
    log="xy",
    frame=FALSE, 
    pch=16, 
    col="#00000044", 
    xlab="True LTQ Variance",  
    ylab="Sanity Estimate", 
    main="Variance of Log Transcriptional Activity"
)
abline(0, 1, col=2, lwd=2, lty=2)
legend("top", legend="y=x", col=2, lwd=2, lty=2, bty="n")

## ----sanity_correlation, fig.cap="Cell Level Estimates. Correlation of Sanity inferred logcounts with latent logcounts using genes at different quantiles of expression."----
umi_mean <- rowMeans(counts(sce))
umi_quantiles <- quantile(umi_mean, seq(0, 1, .2))
umi_quantiles <- cut( 
    umi_mean, 
    umi_quantiles, 
    include.lowest=TRUE, 
    labels=paste0("Q", 1:5)
)
rho <- sapply(
    split(1:nrow(sce), umi_quantiles), 
    function(ix) { 
        sce_q <- sce[ix,] 
        diag(cor(assay(sce_q, "logFC"), logcounts(sce_q))) 
    } 
)

boxplot(
    rho, 
    frame=FALSE, 
    main="Correlation between latent and inferred logcounts", 
    xlab="Quantiles of Mean UMI Count", 
    ylab="Pearson Correlation"
)

## ----calculate_distance, fig.cap="t-SNE visualization using the Sanity calculated distances. Pseudotime is computed based the predecessor relations between cells.", fig.wide=TRUE----
cell_dist <- calculateSanityDistance(sce)
reducedDim(sce, "TSNE") <- Rtsne::Rtsne(cell_dist, is_distance=TRUE)$Y

# Visualize the t-SNE embedding
sce$pseudotime <- 0
for (i in 2:ncol(sce))  # cell 1 is the stem cell
    sce$pseudotime[i] <- sce$pseudotime[sce$predecessor[i]] + 1
scater::plotTSNE(sce, color_by="pseudotime")

## ----session_info-------------------------------------------------------------
sessionInfo()

