## ----setup, message=FALSE-----------------------------------------------------
library(DOTSeq)
suppressPackageStartupMessages(library(SummarizedExperiment))
suppressPackageStartupMessages(library(curl))

# Internet availability
has_net <- isTRUE(curl::has_internet())

# Ping Ensembl REST; on any error, return FALSE
ensembl_ok <- FALSE
if (has_net) {
  ensembl_ok <- tryCatch({
    res <- curl::curl_fetch_memory(
      "https://rest.ensembl.org/info/ping",
      handle = curl::new_handle(timeout = 3)
    )
    isTRUE(res$status_code >= 200 && res$status_code < 500)
  }, error = function(e) FALSE)
}

if (!ensembl_ok) {
  message("Ensembl not reachable; vignette will fall back to org.Hs.eg.db for gene symbols.")
  suppressPackageStartupMessages(library(org.Hs.eg.db))
}

## ----dir----------------------------------------------------------------------
dir <- system.file("extdata", package = "DOTSeq")
list.files(dir)

## ----read-in-count-file-------------------------------------------------------
cnt <- read.table(
    file.path(dir, "featureCounts.cell_cycle_subset.txt.gz"), 
    header = TRUE, 
    comment.char = "#"
)
names(cnt) <- gsub(".*(SRR[0-9]+).*", "\\1", names(cnt))
head(cnt)

## ----ref----------------------------------------------------------------------
gtf <- file.path(dir, "gencode.v47.orf_flattened_subset.gtf.gz")
bed <- file.path(dir, "gencode.v47.orf_flattened_subset.bed.gz")

## ----condition-table----------------------------------------------------------
cond <- read.table(file.path(dir, "metadata.txt.gz"))

# Ensure required column names for DOTSeq
names(cond) <- c("run", "strategy", "replicate", "treatment", "condition")

# Filter to include only chx-treated samples
cond <- cond[cond$treatment == "chx", ]
head(cond)

## ----dotseq, warning=FALSE----------------------------------------------------
# Create a DOTSeqDataSets object
datasets <- DOTSeqDataSetsFromFeatureCounts(
    count_table = cnt,
    condition_table = cond,
    flattened_gtf = gtf,
    flattened_bed = bed,
    verbose = FALSE
)

# Run the DOTSeq workflow
d <- DOTSeq(datasets = datasets)

## ----dotseq-objects-----------------------------------------------------------
show(d)

## ----posthoc------------------------------------------------------------------
results <- getContrasts(d, type = "interaction")
results

## ----rowdata------------------------------------------------------------------
rowData(getDOU(d))

## ----rowranges----------------------------------------------------------------
rowRanges(getDOU(d))

## ----merge--------------------------------------------------------------------
ou <- results$DOU[results$DOU$contrast == "Mitotic_Cycling - Interphase", ]
te <- results$DTE[results$DTE$contrast == "Mitotic_Cycling - Interphase", ]

results <- merge(ou, te, by = c("orf_id", "contrast"), all = TRUE)

## ----plot-venn, eval = requireNamespace("eulerr", quietly = TRUE), fig.small = TRUE----
plotDOT(
    plot_type = "venn", 
    results = results, 
    id_mapping = FALSE, 
    force_new_device = FALSE
)

## ----plot-composite-by-significance, fig.small = TRUE-------------------------
plotDOT(
    plot_type = "composite", 
    results = results, 
    id_mapping = FALSE, 
    plot_params = list(color_by = "significance", legend_position = "bottomright"),
    force_new_device = FALSE
)

## ----plot-composite-by-orfs, fig.small = TRUE---------------------------------
plotDOT(
    plot_type = "composite", 
    results = results, 
    id_mapping = FALSE, 
    data = getDOU(d), 
    plot_params = list(color_by = "orf_type", legend_position = "bottomright"),
    force_new_device = FALSE
)

## ----id-mapping-online-or-offline, fig.small = TRUE---------------------------
# Try accessing Ensembl REST API 
mapping <- NULL
if (isTRUE(ensembl_ok)) {
    mapping <- try(
        plotDOT(
            plot_type   = "volcano",
            results     = results,
            id_mapping  = TRUE, 
            plot_params = list(color_by = "significance", top_hits = 3, legend_position = "topright"),
            force_new_device = FALSE
        ),
    silent = TRUE
    )
}

# Fallback to org.Hs.eg.db
if (is.null(mapping)) {
    suppressPackageStartupMessages(library(org.Hs.eg.db))

    rd <- rowData(getDOU(d))
    rd$gene_id <- sub("\\.\\d+$", "", rd$gene_id)

    mapping <- AnnotationDbi::select(
        org.Hs.eg.db,
        keys    = unique(rd$gene_id),
        keytype = "ENSEMBL",
        columns = "SYMBOL"
    )
    names(mapping) <- c("ensembl_gene_id", "hgnc_symbol")
}

## ----plot-volcano-by-orfs, fig.small = TRUE-----------------------------------
plotDOT(
    plot_type = "volcano", 
    results = results,
    data = getDOU(d),
    id_mapping = mapping,
    plot_params = list(color_by = "orf_type", top_hits = 3, legend_position = "topright"),
    force_new_device = FALSE
)

## ----plot-heatmap, warning=FALSE, fig.width=4, fig.height=6, fig.align="center", out.width="325px"----
plotDOT(
    plot_type = "heatmap", 
    results = results, 
    data = getDOU(d), 
    id_mapping = mapping, 
    plot_params = list(rank_by = "score", top_hits = 20),
    force_new_device = FALSE
)

## ----plot-usage-gene-symbol, eval = requireNamespace("ggplot2", quietly = TRUE) && requireNamespace("ggsignif", quietly = TRUE)----
orderby <- c("Mitotic_Cycling", "Mitotic_Arrest", "Interphase")

id <- "TAF2"

plotDOT(
    plot_type = "usage",
    data = getDOU(d), 
    gene_id = id, 
    id_mapping = mapping, 
    plot_params = list(order_by = orderby),
    force_new_device = FALSE
)

## ----plot-usage-gene-id, eval = requireNamespace("ggsignif", quietly = TRUE)----
id <- "ENSG00000060339"

plotDOT(
    plot_type = "usage",
    data = getDOU(d),
    gene_id = id, 
    id_mapping = mapping, 
    plot_params = list(order_by = orderby),
    force_new_device = FALSE
)

## ----get-orfs, eval = FALSE---------------------------------------------------
# library(withr)
# 
# falink <- "https://ftp.ensembl.org/pub/release-78/fasta/drosophila_melanogaster/cdna/Drosophila_melanogaster.BDGP5.cdna.all.fa.gz"
# gtflink <- "https://ftp.ensembl.org/pub/release-78/gtf/drosophila_melanogaster/Drosophila_melanogaster.BDGP5.78.gtf.gz"
# 
# annotation <- basename(gtflink)
# sequences <- basename(falink)
# 
# download.file(gtflink, destfile=annotation)
# download.file(falink, destfile=sequences)
# 
# # This will generate flattened ORF-level annotation
# gr <- getORFs(
#     sequences = sequences,
#     annotation = annotation,
#     organism = "Drosophila melanogaster"
# )
# 
# withr::defer(unlink(c(annotation, sequences)))

## ----pasilla, eval = FALSE----------------------------------------------------
# library(pasillaBamSubset)
# library(GenomeInfoDb)
# 
# bam_list <- c(untreated1_chr4(), untreated3_chr4())
# 
# # Keep only reads mapped to the exons of coding genes
# temp_dir <- tempdir()
# getExonicReads(gr = gr, seqlevels_style = "UCSC", bam_files = bam_list, bam_output_dir = temp_dir, coding_genes_only = TRUE)
# 
# # Get the list of filtered BAM files
# bam_list <- list.files(
#     path = temp_dir,
#     pattern = "*exonic.*",
#     full.names = TRUE
# )
# 
# cnt <- countReads(gr = gr, bam_files = bam_list)
# withr::defer(unlink(bam_list))

## ----expand, eval = FALSE-----------------------------------------------------
# set.seed(42)
# 
# # Create count_table
# # Create two replicates for each condition with random scaling
# rna_treated_reps <- do.call(
#     cbind,
#     replicate(
#         2,
#         cnt[["untreated3_chr4.exonic.sorted.bam"]] * runif(
#             nrow(cnt),
#             min = 0.5,
#             max = 2
#         ),
#         simplify = FALSE
#     )
# )
# rna_control_reps <- do.call(
#     cbind,
#     replicate(
#         2,
#         cnt[["untreated3_chr4.exonic.sorted.bam"]] * runif(
#             nrow(cnt),
#             min = 0.1,
#             max = 0.5
#         ),
#         simplify = FALSE
#     )
# )
# ribo_treated_reps <- do.call(
#     cbind, replicate(
#         2,
#         cnt[["untreated1_chr4.exonic.sorted.bam"]] * runif(
#             nrow(cnt),
#             min = 0.5,
#             max = 2
#         ),
#         simplify = FALSE
#     )
# )
# ribo_control_reps <- do.call(
#     cbind,
#     replicate(
#         2,
#         cnt[["untreated1_chr4.exonic.sorted.bam"]] * runif(
#             nrow(cnt),
#             min = 0.1,
#             max = 0.5
#         ),
#         simplify = FALSE
#     )
# )
# 
# # Combine and name columns
# colnames(rna_treated_reps) <- paste0("rna_treated", 1:2)
# colnames(rna_control_reps) <- paste0("rna_control", 1:2)
# colnames(ribo_treated_reps) <- paste0("ribo_treated", 1:2)
# colnames(ribo_control_reps) <- paste0("ribo_control", 1:2)
# 
# cnt_expanded <- cbind(
#     rna_treated_reps,
#     rna_control_reps,
#     ribo_treated_reps,
#     ribo_control_reps
# )
# 
# # Convert numbers to integer
# cnt_expanded <- round(cnt_expanded)
# storage.mode(cnt_expanded) <- "integer"
# 
# rownames(cnt_expanded) <- rownames(cnt)
# cnt_expanded <- as.data.frame(cnt_expanded)
# 
# 
# # Create condition_table
# # Sample names from cnt_expanded
# sample_names <- colnames(cnt_expanded)
# 
# # Define condition and strategy for each sample
# condition <- c(
#     rep("treated", 2),
#     rep("control", 2),
#     rep("treated", 2),
#     rep("control", 2)
# )
# strategy <- c(rep("RNA", 4), rep("Ribo", 4))
# 
# cond <- data.frame(
#     run = sample_names,
#     replicate = c(1,2),
#     condition = factor(condition, levels = c("control", "treated")),
#     strategy = factor(strategy, levels = c("RNA", "Ribo"))
# )
# 
# # Create a DOTSeqDataSets object
# d <- DOTSeqDataSetsFromSummarizeOverlaps(
#     count_table = cnt_expanded,
#     condition_table = cond,
#     annotation = gr
# )
# 
# invisible(file.remove(bam_list))
# invisible(file.remove(sub("\\.gz$", ".sqlite", annotation)))

## ----sessionInfo--------------------------------------------------------------
sessionInfo()

