## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("metabinR")

## ----eval=TRUE, message=FALSE-------------------------------------------------
options(java.parameters = "-Xmx1500M")
library(metabinR)
library(data.table)
library(dplyr)
library(ggplot2)
library(gridExtra)
library(cvms)
library(sabre)
library(Biostrings)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
abundances <- read.table(
    system.file("extdata", "distribution_0.txt", package = "metabinR"),
    col.names = c("genome_id", "abundance", "AB_id"))

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
reads.mapping <- fread(
        system.file("extdata", "reads_mapping.tsv.gz", package = "metabinR")) %>%
    merge(abundances[, c("genome_id", "AB_id")], by = "genome_id") %>%
    arrange(anonymous_read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
res.AB <- abundance_based_binning(
    system.file("extdata", "reads.metagenome.fasta.gz", package = "metabinR"),
    numOfClustersAB = 2,
    kMerSizeAB = 10,
    dryRun = FALSE,
    outputAB = "vignette"
)
res.AB

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
assignments.AB <- as.data.frame(res.AB) %>% arrange(read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
histogram.AB <- read.table("vignette__AB.histogram.tsv", header = TRUE)
ggplot(histogram.AB, aes(x = counts, y = frequency)) +
    geom_area() +
    labs(title = "kmer counts histogram") +
    theme_bw()

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
eval.AB.cvms <- cvms::evaluate(
    data = data.frame(
        prediction = as.character(assignments.AB$AB),
        target = as.character(reads.mapping$AB_id),
        stringsAsFactors = FALSE),
    target_col = "target",
    prediction_cols = "prediction",
    type = "binomial"
)
eval.AB.sabre <- sabre::vmeasure(
    as.character(assignments.AB$AB),
    as.character(reads.mapping$AB_id))

p <- cvms::plot_confusion_matrix(eval.AB.cvms) +
    labs(title = "Confusion Matrix",
         x = "Target Abundance Class",
         y = "Predicted Abundance Class")
tab <- as.data.frame(
    c(
        Accuracy    = round(eval.AB.cvms$Accuracy, 4),
        Specificity = round(eval.AB.cvms$Specificity, 4),
        Sensitivity = round(eval.AB.cvms$Sensitivity, 4),
        Fscore      = round(eval.AB.cvms$F1, 4),
        Kappa       = round(eval.AB.cvms$Kappa, 4),
        Vmeasure    = round(eval.AB.sabre$v_measure, 4)
    )
)
grid.arrange(p, ncol = 1)
knitr::kable(tab, caption = "AB binning evaluation", col.names = NULL)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
reads.mapping <- fread(
        system.file("extdata", "reads_mapping.tsv.gz", package = "metabinR")) %>%
    arrange(anonymous_read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
res.CB <- composition_based_binning(
    system.file("extdata", "reads.metagenome.fasta.gz", package = "metabinR"),
    numOfClustersCB = 10,
    kMerSizeCB = 4,
    dryRun = TRUE,
    outputCB = "vignette"
)
assignments.CB <- as.data.frame(res.CB) %>% arrange(read_id)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
eval.CB.sabre <- sabre::vmeasure(
    as.character(assignments.CB$CB),
    as.character(reads.mapping$genome_id))
tab <- as.data.frame(
    c(
        Vmeasure     = round(eval.CB.sabre$v_measure, 4),
        Homogeneity  = round(eval.CB.sabre$homogeneity, 4),
        Completeness = round(eval.CB.sabre$completeness, 4)
    )
)
knitr::kable(tab, caption = "CB binning evaluation", col.names = NULL)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
res.ABxCB <- hierarchical_binning(
    system.file("extdata", "reads.metagenome.fasta.gz", package = "metabinR"),
    numOfClustersAB = 2,
    kMerSizeAB = 10,
    kMerSizeCB = 4,
    dryRun = TRUE,
    outputC = "vignette"
)
assignments.ABxCB <- as.data.frame(res.ABxCB) %>% arrange(read_id)

eval.ABxCB.sabre <- sabre::vmeasure(
    as.character(assignments.ABxCB$ABxCB),
    as.character(reads.mapping$genome_id))
tab <- as.data.frame(
    c(
        Vmeasure     = round(eval.ABxCB.sabre$v_measure, 4),
        Homogeneity  = round(eval.ABxCB.sabre$homogeneity, 4),
        Completeness = round(eval.ABxCB.sabre$completeness, 4)
    )
)
knitr::kable(tab, caption = "ABxCB binning evaluation", col.names = NULL)

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
reads <- Biostrings::readDNAStringSet(
    system.file("extdata", "reads.metagenome.fasta.gz", package = "metabinR"))

res.AB.mem <- abundance_based_binning(
    reads,
    numOfClustersAB = 2,
    kMerSizeAB = 10,
    dryRun = TRUE
)
identical(nrow(assignments(res.AB.mem)), nrow(assignments(res.AB)))

## ----eval=TRUE, message=FALSE, warning=FALSE----------------------------------
unlink("vignette__*")

## ----setup--------------------------------------------------------------------
utils::sessionInfo()

