## ----chunk_setup, include = FALSE---------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----vignette_setup, echo=FALSE, message=FALSE, warning = FALSE---------------
# Track time spent on making the vignette.
start_time <- Sys.time()
pkg <- function(x) {
    paste0("[*", x, "*](https://cran.r-project.org/package=", x, ")")
}
pypkg <- function(x) {
    paste0("[*", x, "*](https://pypi.org/project/", x, "/)")
}

## ----bioconductor_install, eval=FALSE-----------------------------------------
# install.packages("BiocManager")
# BiocManager::install("DOtools")

## ----github_install, eval=FALSE-----------------------------------------------
# BiocManager::install("MarianoRuzJurado/DOtools")

## ----load_library, message=FALSE----------------------------------------------
#logging with sink
#sink(file = "output.log", append=TRUE, split = TRUE)

library(DOtools)

# Additional packages
library(SummarizedExperiment)
library(scran)
library(scater)
library(plyr)
library(dplyr)
library(tibble)
library(enrichR)
library(kableExtra)
library(Seurat)

## ----Cellbender, eval=FALSE, warning = FALSE----------------------------------
# base <- DOtools:::.example_10x()
# dir.create(file.path(base, "/cellbender"))
# 
# raw_files <- list.files(base,
#     pattern = "raw_feature_bc_matrix\\.h5$",
#     recursive = TRUE,
#     full.names = TRUE
# )
# 
# DO.CellBender(
#     cellranger_path = base,
#     output_path = file.path(base, "/cellbender"),
#     samplenames = c("disease"),
#     cuda = TRUE,
#     BarcodeRanking = FALSE,
#     cpu_threads = 48,
#     epochs = 150
# )

## ----read_example_data, warning = FALSE, eval=FALSE---------------------------
# base <- DOtools:::.example_10x()
# 
# paths <- c(
#     file.path(base, "healthy/outs/filtered_feature_bc_matrix.h5"),
#     file.path(base, "disease/outs/filtered_feature_bc_matrix.h5")
# )
# 
# SCE_obj <- DO.Import(
#     pathways = paths,
#     ids = c("healthy-1", "disease-1"),
#     DeleteDoublets = TRUE,
#     cut_mt = .05,
#     min_counts = 500,
#     min_genes = 300,
#     high_quantile = .95,
#     Seurat = FALSE # Set to TRUE for Seurat object
# )

## ----preflt, out.width="100%", fig.align="center", fig.width=8, fig.height=6----
prefilterplots <- system.file(
    "figures", "prefilterplots-1.png",
    package = "DOtools"
)

pQC1 <- magick::image_read(prefilterplots)
plot(pQC1)

## ----postflt, out.width="100%", fig.align="center", fig.width=8, fig.height=6----
postfilterplots <- system.file(
    "figures",
    "postfilterplots-1.png",
    package = "DOtools"
)

pQC2 <- magick::image_read(postfilterplots)
plot(pQC2)

## ----Correlation, fig.width=6, fig.height=6-----------------------------------
# Making sure we have a save folder
base <- tempfile("my_tempdir_")
dir.create(base)

SCE_obj <- readRDS(
    system.file("extdata",
        "sce_data.rds",
        package = "DOtools"
    )
)

DO.Correlation(SCE_obj)

## ----CCA Integration, warning = FALSE-----------------------------------------
SCE_obj <- DO.Integration(
    sce_object = SCE_obj,
    split_key = "orig.ident",
    HVG = TRUE,
    scale = TRUE,
    pca = TRUE,
    integration_method = "CCAIntegration"
)

## ----scVI Integration, warning=FALSE, eval=FALSE------------------------------
# # (Optional) Integration with scVI-Model
# SCE_obj <- DO.scVI(
#     sce_object = SCE_obj,
#     batch_key = "orig.ident",
#     layer_counts = "counts",
#     layer_logcounts = "logcounts"
# )
# 
# 
# SNN_Graph <- scran::buildSNNGraph(SCE_obj,
#     use.dimred = "scVI"
# )
# 
# clust_SCVI <- igraph::cluster_louvain(SNN_Graph,
#     resolution = 0.3
# )
# 
# SCE_obj$leiden0.3 <- factor(igraph::membership(clust_SCVI))
# SCE_obj <- scater::runUMAP(SCE_obj, dimred = "scVI", name = "UMAP")

## ----Clustering and UMAP, warning = FALSE-------------------------------------
DO.UMAP(SCE_obj,
    group.by = "leiden0.3"
)

DO.UMAP(SCE_obj,
    group.by = "condition",
    legend.position = "right",
    label = FALSE
)

## ----annotation, warning = FALSE----------------------------------------------
SCE_obj <- DO.CellTypist(SCE_obj,
    modelName = "Healthy_COVID19_PBMC.pkl",
    runCelltypistUpdate = TRUE,
    over_clustering = "leiden0.3"
)
DO.UMAP(SCE_obj, group.by = "autoAnnot", legend.position = "right")

## ----Manual annotation, fig.width=12, fig.height=7----------------------------
markers_list <- scran::findMarkers(
    SCE_obj,
    test.type = "t",
    groups = SingleCellExperiment::colData(SCE_obj)$autoAnnot,
    direction = "up",
    lfc = 0.25,
    pval.type = "any"
)

# pick top 5 per cluster, naming adjustments
annotation_Markers <- lapply(names(markers_list), function(cluster) {
    df <- as.data.frame(markers_list[[cluster]])
    df$gene <- rownames(df)
    df$cluster <- cluster
    df %>%
        rename(
            avg_log2FC = summary.logFC,
            p_val      = p.value,
            p_val_adj  = FDR
        ) %>%
        dplyr::select(gene, cluster, avg_log2FC, p_val, p_val_adj)
}) %>%
    bind_rows()

# or with seurat if preferred
Seu_obj <- as.Seurat(SCE_obj)
annotation_Markers <- FindAllMarkers(
    object = Seu_obj,
    assay = "RNA",
    group.by = "autoAnnot",
    min.pct = 0.25,
    logfc.threshold = 0.25
)

annotation_Markers <- annotation_Markers %>%
    arrange(desc(avg_log2FC)) %>%
    distinct(gene, .keep_all = TRUE) %>%
    group_by(cluster) %>%
    slice_head(n = 5)

p1 <- DO.Dotplot(
    sce_object = SCE_obj,
    Feature = annotation_Markers,
    group.by.x = "leiden0.3",
    plot.margin = c(1, 1, 1, 1),
    annotation_x = TRUE,
    point_stroke = 0.1,
    annotation_x_rev = TRUE,
    textSize = 14,
    hjust = 0.5,
    vjust = 0,
    textRot = 0,
    segWidth = 0.3,
    lwd = 3
)

# manual set of markers
annotation_Markers <- data.frame(
    cluster = c(
        "ImmuneCells",
        rep("B_cells", 3),
        rep("T_cells", 3),
        rep("NK", 2),
        rep("Myeloid", 3),
        rep("pDC", 3)
    ),
    genes = c(
        "PTPRC", "CD79A", "BANK1", "MS4A1",
        "CD3E", "CD4", "IL7R", "NKG7",
        "KLRD1", "CD68", "CD14", "ITGAM",
        "LILRA4", "CLEC4C", "LRRC26"
    )
)

p2 <- DO.Dotplot(
    sce_object = SCE_obj,
    Feature = annotation_Markers,
    group.by.x = "leiden0.3",
    plot.margin = c(1, 1, 1, 1),
    annotation_x = TRUE,
    point_stroke = 0.1,
    annotation_x_rev = TRUE,
    textSize = 14,
    hjust = 0.5,
    vjust = 0,
    textRot = 0,
    segWidth = 0.3,
    lwd = 3
)

# Visualise marker expression in UMAP
DO.UMAP(SCE_obj,
    FeaturePlot = TRUE,
    features = "NKG7",
    group.by = "leiden0.3",
    legend.position = "right"
)

## ----renaming-----------------------------------------------------------------
SCE_obj$annotation <- plyr::revalue(SCE_obj$leiden0.3, c(
    `1` = "T_cells",
    `2` = "T_cells",
    `3` = "NK",
    `4` = "B_cells",
    `5` = "Monocytes",
    `6` = "NK",
    `7` = "T_cells",
    `8` = "pDC"
))

DO.UMAP(SCE_obj, group.by = "annotation", legend.position = "right")

## ----cell populations, warning=FALSE------------------------------------------
DO.CellComposition(SCE_obj,
    assay_normalized = "RNA",
    cluster_column = "annotation",
    sample_column = "orig.ident",
    condition_column = "condition",
    transform_method = "arcsin",
    n_reps = 3
)

## ----Recluster, warning=FALSE-------------------------------------------------
SCE_obj <- DO.FullRecluster(SCE_obj, over_clustering = "annotation")
DO.UMAP(SCE_obj, group.by = "annotation_recluster")
T_cells <- DO.Subset(SCE_obj,
    ident = "annotation_recluster",
    ident_name = grep("T_cells",
        unique(SCE_obj$annotation_recluster),
        value = TRUE
    )
)

T_cells <- DO.CellTypist(T_cells,
    modelName = "Healthy_COVID19_PBMC.pkl",
    runCelltypistUpdate = FALSE,
    over_clustering = "annotation_recluster",
    SeuV5 = FALSE
)

T_cells$annotation <- plyr::revalue(
    T_cells$annotation_recluster,
    c(
        `T_cells_1` = "CD4_T_cells",
        `T_cells_2` = "CD4_T_cells",
        `T_cells_3` = "CD4_T_cells",
        `T_cells_4` = "CD8_T_cells"
    )
)

## ----re-annotate--------------------------------------------------------------
SCE_obj <- DO.TransferLabel(SCE_obj,
    Subset_obj = T_cells,
    annotation_column = "annotation",
    subset_annotation = "annotation"
)

DO.UMAP(SCE_obj, group.by = "annotation", legend.position = "right")

## ----GO analysis, warning=FALSE-----------------------------------------------
# this data set contains only one sample per condition
# we introduce replicates for showing the pseudo bulk approach
set.seed(123)
SCE_obj$orig.ident2 <- sample(rep(c("A", "B", "C", "D", "E", "F"),
    length.out = ncol(SCE_obj)
))
CD4T_cells <- DO.Subset(SCE_obj,
    ident = "annotation",
    ident_name = "CD4_T_cells"
)

DGE_result <- DO.MultiDGE(CD4T_cells,
    sample_col = "orig.ident2",
    method_sc = "wilcox",
    ident_ctrl = "healthy"
)

head(DGE_result, 10) %>%
    kable(format = "html", table.attr = "style='width:100%;'") %>%
    kable_styling(bootstrap_options = c(
        "striped",
        "hover",
        "condensed",
        "responsive"
    ))

## ----GO analysis2, warning=FALSE----------------------------------------------
result_GO <- DO.enrichR(
    df_DGE = DGE_result,
    gene_column = "gene",
    pval_column = "p_val_adj_SC_wilcox",
    log2fc_column = "avg_log2FC_SC_wilcox",
    pval_cutoff = 0.05,
    log2fc_cutoff = 0.25,
    path = NULL,
    filename = "",
    species = "Human",
    go_catgs = "GO_Biological_Process_2023"
)

head(result_GO, 5) %>%
    kable(format = "html", table.attr = "style='width:100%;'") %>%
    kable_styling(bootstrap_options = c(
        "striped",
        "hover",
        "condensed",
        "responsive"
    ))

## ----GO visualisation, fig.width=10, fig.height=8-----------------------------
result_GO_sig <- result_GO[result_GO$Adjusted.P.value < 0.05, ]
result_GO_sig$celltype <- "CD4T_cells"

DO.SplitBarGSEA(
    df_GSEA = result_GO_sig,
    term_col = "Term",
    col_split = "Combined.Score",
    cond_col = "State",
    pos_cond = "enriched",
    showP = FALSE,
    path = paste0(base, "/")
)
GSEA_plot <- list.files(
    path = base,
    pattern = "SplitBar.*\\.svg$",
    full.names = TRUE,
    recursive = TRUE
)
plot(magick::image_read_svg(GSEA_plot))

## ----dotplot, fig.width=5, fig.height=5---------------------------------------
DO.Dotplot(
    sce_object = SCE_obj,
    group.by.x = "condition",
    group.by.y = "annotation",
    Feature = "NKG7",
    stats_y = TRUE
)

## ----Heat map-----------------------------------------------------------------
path_file <- tempfile("dotools_plots_")
dir.create(path_file, recursive = TRUE, showWarnings = FALSE)

DO.Heatmap(SCE_obj,
    group_by = "leiden0.3",
    features = rownames(SCE_obj)[1:10],
    xticks_rotation = 45,
    path = path_file,
    stats_x_size = 20,
    showP = FALSE
)

Heatmap_plot <- list.files(
    path = path_file,
    pattern = "Heatmap*\\.svg$",
    full.names = TRUE,
    recursive = TRUE
)

plot(magick::image_read_svg(Heatmap_plot))

## ----Heat map FC--------------------------------------------------------------
path_file <- tempfile("dotools_plots_")
dir.create(path_file, recursive = TRUE, showWarnings = FALSE)

DO.HeatmapFC(SCE_obj,
    group_by = "leiden0.3",
    features = rownames(SCE_obj)[1:10],
    reference = "healthy",
    condition_key = "condition",
    xticks_rotation = 45,
    path = path_file,
    stats_x_size = 20,
    showP = FALSE
)

Heatmap_plot2 <- list.files(
    path = path_file,
    pattern = "Heatmap*\\.svg$",
    full.names = TRUE,
    recursive = TRUE
)

plot(magick::image_read_svg(Heatmap_plot2))

## ----Violin, fig.width=8, fig.height=5.5, warning = FALSE---------------------
SCE_obj_sub <- DO.Subset(SCE_obj,
    ident = "annotation",
    ident_name = c("NK", "CD4_T_cells", "B_cells")
)

DO.VlnPlot(SCE_obj_sub,
    Feature = "NKG7",
    group.by = "condition",
    group.by.2 = "annotation",
    ctrl.condition = "healthy"
)

## ----Bar, fig.width=5, fig.height=6, warning = FALSE--------------------------
SCE_obj_NK <- DO.Subset(SCE_obj,
    ident = "annotation",
    ident_name = "NK"
)

DO.Barplot(SCE_obj_NK,
    group.by = "condition",
    ctrl.condition = "healthy",
    Feature = "NKG7",
    test_use = "wilcox",
    correction_method = "fdr",
    x_label_rotation = 0
)

## ----Box, fig.width=5, fig.height=6, warning = FALSE--------------------------
set.seed(123)
SCE_obj$rdm_sample <- sample(rep(c("A", "B", "C"),
    length.out = ncol(SCE_obj)
))

SCE_obj$LogCounts <- log1p(SCE_obj$nCount_RNA) 

DO.BoxPlot(SCE_obj,
    group.by = "rdm_sample",
    ctrl.condition = "A",
    Feature = "LogCounts",
    step_mod = 0.01,
    stat_pos_mod = 1.001,
    plot_sample = FALSE
)

## ----session_info, echo=FALSE-----------------------------------------------------------------------------------------
options(width = 120)
sessioninfo::session_info()

