## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
    echo = TRUE,
    warning = FALSE,
    error = FALSE,
    fig.align = "center",
    dpi = 200
)

## ----install-scLANE-bioc, eval=FALSE------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#   install.packages("BiocManager")
# }
# BiocManager::install("scLANE")

## ----install-scLANE-git, eval=FALSE-------------------------------------------
# remotes::install_github("jr-leary7/scLANE")

## ----results='hide', message=FALSE, warning=FALSE-----------------------------
library(scran)
library(dplyr)
library(scater)
library(scLANE)
library(ggplot2)
library(ComplexHeatmap)
select <- dplyr::select
filter <- dplyr::filter

## -----------------------------------------------------------------------------
data(sim_counts)
data(sim_pseudotime)

## ----fig.cap="PCA embedding showing ground-truth pseudotime"------------------
plotPCA(sim_counts, colour_by = "cell_time_normed") +
    theme_scLANE(umap = TRUE)

## ----fig.cap="PCA embedding showing subject ID"-------------------------------
plotPCA(sim_counts, colour_by = "subject") +
    theme_scLANE(umap = TRUE)

## -----------------------------------------------------------------------------
pt_df <- data.frame(PT = sim_counts$cell_time_normed)
cell_offset <- createCellOffset(sim_counts)
top1k_hvgs <- getTopHVGs(modelGeneVar(sim_counts), n = 1e3)

## -----------------------------------------------------------------------------
scLANE_models <- testDynamic(sim_counts,
    pt = pt_df,
    genes = top1k_hvgs[1:5],
    size.factor.offset = cell_offset,
    n.cores = 1,
    verbose = FALSE
)

## -----------------------------------------------------------------------------
scLANE_de_res <- getResultsDE(scLANE_models)
select(scLANE_de_res, Gene, Test_Stat, P_Val, P_Val_Adj, Gene_Dynamic_Overall) %>%
    slice_sample(n = 5) %>%
    knitr::kable(
        digits = 3,
        caption = "Sample of scLANE TDE statistics",
        col.names = c("Gene", "LRT Stat.", "P-value", "Adj. P-value", "Pred. Status")
    )

## ----fig.cap="Modeling framework comparison"----------------------------------
plotModels(scLANE_models,
    gene = scLANE_de_res$Gene[1],
    pt = pt_df,
    expr.mat = sim_counts,
    size.factor.offset = cell_offset,
    plot.glm = TRUE,
    plot.gam = TRUE
)

## -----------------------------------------------------------------------------
scLANE_models[[scLANE_de_res$Gene[1]]]$Lineage_A$Gene_Dynamics

## ----fig.cap="Dynamics of the top 4 most TDE genes"---------------------------
getFittedValues(scLANE_models,
    genes = scLANE_de_res$Gene[1:4],
    pt = pt_df,
    expr.mat = sim_counts,
    size.factor.offset = cell_offset,
    cell.meta.data = data.frame(cluster = sim_counts$label)
) %>%
    ggplot(aes(x = pt, y = rna_log1p)) +
    facet_wrap(~gene, ncol = 2) +
    geom_point(aes(color = cluster),
        size = 2,
        alpha = 0.75,
        stroke = 0
    ) +
    geom_ribbon(aes(ymin = scLANE_ci_ll_log1p, ymax = scLANE_ci_ul_log1p),
        linewidth = 0,
        fill = "grey70",
        alpha = 0.9
    ) +
    geom_line(aes(y = scLANE_pred_log1p),
        color = "black",
        linewidth = 0.75
    ) +
    scale_x_continuous(labels = scales::label_number(accuracy = 0.01)) +
    labs(
        x = "Pseudotime",
        y = "Normalized Expression",
        color = "Leiden"
    ) +
    theme_scLANE() +
    theme(strip.text.x = element_text(face = "italic")) +
    guides(color = guide_legend(override.aes = list(alpha = 1, size = 2, stroke = 1)))

## -----------------------------------------------------------------------------
dyn_genes <- filter(scLANE_de_res, Gene_Dynamic_Overall == 1) %>%
    pull(Gene)
smoothed_counts <- smoothedCountsMatrix(scLANE_models,
    size.factor.offset = cell_offset,
    pt = pt_df,
    genes = dyn_genes,
    log1p.norm = TRUE
)

## -----------------------------------------------------------------------------
col_anno_df <- data.frame(
    cell_name = colnames(sim_counts),
    leiden = as.factor(sim_counts$label),
    subject = as.factor(sim_counts$subject),
    pseudotime = sim_counts$cell_time_normed
) %>%
    arrange(pseudotime)
gene_order <- sortGenesHeatmap(smoothed_counts$Lineage_A, pt.vec = sim_counts$cell_time_normed)
heatmap_mat <- t(scale(smoothed_counts$Lineage_A))
colnames(heatmap_mat) <- colnames(sim_counts)
heatmap_mat <- heatmap_mat[, col_anno_df$cell_name]
heatmap_mat <- heatmap_mat[gene_order, ]
col_anno <- HeatmapAnnotation(
    Leiden = col_anno_df$leiden,
    Subject = col_anno_df$subject,
    Pseudotime = col_anno_df$pseudotime,
    show_legend = TRUE,
    show_annotation_name = FALSE,
    gap = unit(1, "mm"),
    border = TRUE
)

## ----fig.cap="Expression cascade of dynamic genes across pseudotime", message=FALSE, warning=FALSE----
Heatmap(
    matrix = heatmap_mat,
    name = "Scaled\nmRNA",
    col = circlize::colorRamp2(
        colors = viridis::inferno(50),
        breaks = seq(min(heatmap_mat), max(heatmap_mat), length.out = 50)
    ),
    cluster_columns = FALSE,
    width = 9,
    height = 6,
    column_title = "",
    cluster_rows = FALSE,
    top_annotation = col_anno,
    border = TRUE,
    show_column_names = FALSE,
    show_row_names = FALSE,
    use_raster = TRUE,
    raster_by_magick = TRUE,
    raster_quality = 5
)

## ----fig.cap="Embedding & clustering of gene dynamics"------------------------
gene_embedding <- embedGenes(expm1(smoothed_counts$Lineage_A))
ggplot(gene_embedding, aes(x = umap1, y = umap2, color = leiden)) +
    geom_point(
        alpha = 0.75,
        size = 2,
        stroke = 0
    ) +
    labs(
        x = "UMAP 1",
        y = "UMAP 2",
        color = "Gene Cluster"
    ) +
    theme_scLANE(umap = TRUE) +
    guides(color = guide_legend(override.aes = list(size = 2, alpha = 1, stroke = 1)))

## ----message=FALSE, warning=FALSE---------------------------------------------
sim_counts <- geneProgramScoring(sim_counts,
    genes = gene_embedding$gene,
    gene.clusters = gene_embedding$leiden
)

## ----fig.cap="Module scores for gene cluster 1"-------------------------------
plotPCA(sim_counts, colour_by = "cluster_0") +
    theme_scLANE(umap = TRUE)

## ----message=FALSE, warning=FALSE---------------------------------------------
dyn_gene_enrichment <- enrichDynamicGenes(scLANE_de_res, species = "hsapiens")
filter(dyn_gene_enrichment$result, source == "GO:BP") %>%
    select(term_id, term_name, p_value, source) %>%
    slice_head(n = 5) %>%
    knitr::kable(
        digits = 3,
        caption = "Trajectory pathway enrichment statistics",
        col.names = c("Term ID", "Term name", "Adj. p-value", "Source")
    )

## -----------------------------------------------------------------------------
sessionInfo()

