## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----install_dependencies, eval = FALSE---------------------------------------
# install.packages(c(
#     "data.table",
#     "seqinr",
#     "openxlsx",
#     "dplyr"
# ))
# if (!require("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
# }
# BiocManager::install(c(
#     "IRanges",
#     "GenomicRanges",
#     "GenomicAlignments",
#     "Rsamtools",
#     "Biostrings",
#     "Seqinfo",
#     "BSgenome",
#     "rtracklayer"
# ))

## ----install_PICB, eval = FALSE-----------------------------------------------
# BiocManager::install("PICB")

## ----install_PICB_dev, eval = FALSE-------------------------------------------
# devtools::install_github("HaaseLab/PICB")

## ----load_PICB----------------------------------------------------------------
library(PICB)

## ----load_BSgenome1, results="hide", message=FALSE----------------------------
# Example for Drosophila melanogaster (make sure to install the package first), replace with your own genome
library(BSgenome.Dmelanogaster.UCSC.dm6)
myGenome <- "BSgenome.Dmelanogaster.UCSC.dm6"

## ----load_myGenome2-----------------------------------------------------------
myGenome <- Seqinfo::Seqinfo(
    seqnames = c("chr2L", "chr2R", "chr3L", "chr3R", "chr4", "chrX", "chrY"),
    seqlengths = c(23513712, 25286936, 28110227, 32079331, 1348131, 23542271, 3667352)
)

## ----load_myGenome3-----------------------------------------------------------
myGenome <- Seqinfo::Seqinfo(genome = "dm6")

## ----load_myGenome4, eval = FALSE---------------------------------------------
# myGenome <- PICBloadfasta(FASTA.NAME = "/path/to/your/genome.fa")

## ----load_alignments----------------------------------------------------------
# demonstration bam_file is in the "inst/extdata/" folder of the PICB package
bam_file <- system.file("extdata", "Fly_Ov1_chr2L_20To21mb_filtered.bam", package = "PICB")
myAlignments <- PICBload(
    BAMFILE = bam_file,
    REFERENCE.GENOME = myGenome
)

## ----build_clusters-----------------------------------------------------------
myClusters <- PICBbuild(
    IN.ALIGNMENTS = myAlignments,
    REFERENCE.GENOME = myGenome,
    LIBRARY.SIZE = 12799826 #usually not necessary
)$clusters

## ----optimize_parameters------------------------------------------------------
parameterExploration <- PICBoptimize(
    IN.ALIGNMENTS = myAlignments,
    REFERENCE.GENOME = myGenome,
    MIN.UNIQUE.ALIGNMENTS.PER.WINDOW = c(1, 2, 3, 4, 5), 
    LIBRARY.SIZE = 12799826, #usually not necessary
    VERBOSITY = 1
)

## ----example_specify_parameter------------------------------------------------
x_column <- "MIN.UNIQUE.ALIGNMENTS.PER.WINDOW" # change parameter to optimize, if applicable

## ----example_plot_optimize_parameters, message=FALSE--------------------------
library(ggplot2)
# Determine a scaling factor for the secondary axis
scaling_factor <- max(parameterExploration$fraction.of.library.explained.by.clusters) / max(parameterExploration$fraction.of.genome.space.clusters)
# plot graph
ggplot(parameterExploration, aes(x = .data[[x_column]])) +
    geom_line(aes(y = fraction.of.library.explained.by.clusters * 100, color = "piRNAs Explained"), linewidth = 1) +
    geom_point(aes(y = fraction.of.library.explained.by.clusters * 100, color = "piRNAs Explained"), size = 3) +
    geom_line(aes(y = fraction.of.genome.space.clusters * scaling_factor * 100, color = "Genome Space"), linewidth = 1) +
    geom_point(aes(y = fraction.of.genome.space.clusters * scaling_factor * 100, color = "Genome Space"), size = 3) +
    scale_y_continuous(name = "piRNAs Explained by Clusters (%, piRNA sample)", sec.axis = sec_axis(~ . / scaling_factor, name = "Total piRNA cluster-length (Genome, %)")) +
    scale_x_reverse(name = paste0("Parameter chosen: ", x_column), breaks = parameterExploration[[x_column]], labels = parameterExploration[[x_column]]) +
    scale_color_manual(name = "Metrics", values = c("piRNAs Explained" = "#00a100", "Genome Space" = "black")) +
    theme_classic() +
    theme(axis.title.y.left = element_text(color = "#00a100"), axis.title.y.right = element_text(color = "black"), legend.position = "top")

## ----strand_specific_analysis-------------------------------------------------
myClustersWithStrandAnalysis <- PICBstrandanalysis(
    IN.ALIGNMENTS = myAlignments,
    IN.RANGES = myClusters
)

## ----output_clusters----------------------------------------------------------
myClustersWithStrandAnalysis

## ----export_results-----------------------------------------------------------
PICBexporttoexcel(
    IN.RANGES = myClustersWithStrandAnalysis,
    EXCEL.FILE.NAME = "myClusters_demonstration.xlsx"
)

## ----import_results-----------------------------------------------------------
myClustersWithStrandAnalysis <- PICBimportfromexcel(EXCEL.FILE.NAME = "myClusters_demonstration.xlsx")

## ----SessionInfo--------------------------------------------------------------
sessionInfo()

