## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    crop = NULL # Related to
    # https://stat.ethz.ch/pipermail/bioc-devel/2020-April/016656.html
)

## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE----------------
## Bib setup
library("RefManageR")

## Write bibliography information
bib_packages <- c(
    R = citation(),
    BiocStyle = citation("BiocStyle")[1],
    knitr = citation("knitr")[1],
    RefManageR = citation("RefManageR")[1],
    rmarkdown = citation("rmarkdown")[1],
    sessioninfo = citation("sessioninfo")[1],
    testthat = citation("testthat")[1]
)

bib <- ReadBib("../inst/REFERENCES.bib")

# for (pkg in names(bib_packages)) {
#   bib[[pkg]] <- bib_packages[[pkg]]
# }


BibOptions(
    check.entries = FALSE, style = "markdown", cite.style = "numeric",
    bib.style = "numeric"
)

## ----"install", eval = FALSE--------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
# }
# 
# BiocManager::install("posDemux")
# 
# ## Check that you have a valid Bioconductor installation
# BiocManager::valid()

## ----"github_install", eval = FALSE-------------------------------------------
# if (!requireNamespace("devtools", quietly = TRUE)) {
#     install.packages("devtools")
# }
# 
# devtools::install("yaccos/posDemux")

## ----"citation"---------------------------------------------------------------
## Citation info
citation("posDemux")

## ----"start", message=FALSE---------------------------------------------------
library(posDemux)
library(Biostrings)
library(purrr)
library(magrittr)

## ----segments-----------------------------------------------------------------
sequence_annotation <- c(UMI = "P", "B", "A", "B", "A", "B", "A")
segment_lengths <- c(7L, 7L, 15L, 7L, 14L, 7L, NA_integer_)

## ----barcodes-----------------------------------------------------------------
barcode_files <- system.file("extdata/PETRI-seq_barcodes",
    c(
        bc1 = "bc1.fa",
        bc2 = "bc2.fa",
        bc3 = "bc3.fa"
    ),
    package = "posDemux"
)
names(barcode_files) <- paste0("bc", 1L:3L)
barcode_index <- map(barcode_files, readDNAStringSet)

## ----read_fastq---------------------------------------------------------------
input_fastq <- system.file("extdata",
    "PETRI-seq_forward_reads.fq.gz",
    package = "posDemux"
)
reads <- readDNAStringSet(input_fastq, format = "fastq")

## ----barcode_reorder----------------------------------------------------------
barcodes <- barcode_index[c("bc3", "bc2", "bc1")]

## ----run_demultiplex----------------------------------------------------------
demultiplex_res <- combinatorial_demultiplex(
    reads,
    barcodes = barcodes,
    segments = sequence_annotation,
    segment_lengths = segment_lengths
)

## ----assigned_barcodes--------------------------------------------------------
head(demultiplex_res$assigned_barcodes)

## ----mismatches---------------------------------------------------------------
head(demultiplex_res$mismatches)

## ----umis---------------------------------------------------------------------
demultiplex_res$payload$UMI

## ----filter_res---------------------------------------------------------------
filtered_res <- filter_demultiplex_res(
    demultiplex_res,
    allowed_mismatches = 1L
)

## ----summary_res--------------------------------------------------------------
filtered_res$summary_res

## ----filtered_tables----------------------------------------------------------
head(filtered_res$demultiplex_res$assigned_barcodes)
head(filtered_res$demultiplex_res$mismatches)

## ----retained-----------------------------------------------------------------
head(filtered_res$retained)

## ----freq_table---------------------------------------------------------------
freq_table <- create_freq_table(
    filtered_res$demultiplex_res$assigned_barcodes
)
head(freq_table)

## ----shiny_app, eval=FALSE----------------------------------------------------
# interactive_bc_cutoff(freq_table)

## ----shiny_app_cmd, eval=FALSE------------------------------------------------
# app <- interactive_bc_cutoff(freq_table)
# shiny::runApp(app, launch.browser = FALSE)

## ----cutoff_convert-----------------------------------------------------------
bc_cutoff <- 500L
freq_cutoff <- bc_to_freq_cutoff(freq_table, bc_cutoff)
freq_cutoff

## ----cutoff_reconstruct-------------------------------------------------------
reconstrued_bc_cutoff <- freq_to_bc_cutoff(
    freq_table,
    freq_cutoff
)
reconstrued_bc_cutoff

## ----knee_plot----------------------------------------------------------------
knee_plot(freq_table = freq_table, cutoff = bc_cutoff)

## ----freq_plot----------------------------------------------------------------
# Since the cutoff lines of the plot are provided by the literal x-coordinate,
# we must use the frequency cutoff
freq_plot(freq_table,
    cutoff = freq_cutoff, type = "density",
    log_scale_x = TRUE
)

## ----mass_plot----------------------------------------------------------------
# Since the cutoff lines of the plot are provided by the literal x-coordinate,
# we must use the frequency cutoff
freq_plot(freq_table,
    cutoff = freq_cutoff, type = "density",
    log_scale_x = TRUE, scale_by_reads = TRUE
)

## ----select_freq_table--------------------------------------------------------
selected_freq_table <- freq_table[seq_len(bc_cutoff), ]

## ----extract_tables-----------------------------------------------------------
assigned_barcodes <- filtered_res$demultiplex_res$assigned_barcodes
read_in_selection <- row_match(assigned_barcodes, selected_freq_table)
selected_assigned_barcodes <- assigned_barcodes[read_in_selection, ]

## ----extract_umis-------------------------------------------------------------
assigned_UMI <- filtered_res$demultiplex_res$payload$UMI %>% as.character()
selected_assigned_UMI <- assigned_UMI[read_in_selection]

## ----combined_frame-----------------------------------------------------------
res_table <- as.data.frame(selected_assigned_barcodes) %>%
    dplyr::mutate(
        read = rownames(selected_assigned_barcodes),
        UMI = selected_assigned_UMI
    ) %>%
    # Ensures the columns appears in the desired order
    dplyr::select(read, UMI, bc3, bc2, bc1)
head(res_table)

## ----write_to_file------------------------------------------------------------
file <- tempfile(pattern = "barcode_table", fileext = ".txt")
write.table(res_table, file,
    row.names = FALSE,
    col.names = TRUE, sep = "\t", eol = "\n", quote = FALSE
)

## ----reproduce3, echo=FALSE-------------------------------------------------------------------------------------------
## Session info
library("sessioninfo")
options(width = 120)
session_info()

## ----biblio, results = "asis", echo = FALSE, warning = FALSE, message = FALSE-----------------------------------------
## Print bibliography
PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html"))

