## ----include = FALSE----------------------------------------------------------
library(knitr)
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----echo=TRUE, eval=FALSE----------------------------------------------------
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("ELViS")

## ----setup--------------------------------------------------------------------
library(ELViS)
library(ggplot2)
library(glue)
library(dplyr)
library(ComplexHeatmap)
theme_set(theme_bw())

## -----------------------------------------------------------------------------
analysis_dir = tempdir()
dir.create(analysis_dir,showWarnings = FALSE)

package_name = "ELViS"

# load toy example meta data
data(toy_example,package = package_name)

# get lust of bam file paths
ext_path = system.file("extdata",package = package_name)
bam_files = list.files(ext_path,full.names = TRUE,pattern = "bam$")



## -----------------------------------------------------------------------------
os_name = Sys.info()["sysname"]
if( os_name == "Windows" ){
    N_cores <- 1L
}else{
    N_cores <- 2L
}


# the name of the reference viral sequence the reads were aligned to
target_virus_name = "gi|333031|lcl|HPV16REF.1|"

# temporary file directory
tmpdir=tempdir()

# generate read depth matrix
system.time({
    mtrx_samtools_reticulate__example = 
        get_depth_matrix(
            bam_files = bam_files,coord_or_target_virus_name = target_virus_name
            ,mode = "samtools_reticulate"
            ,N_cores = N_cores
            ,min_mapq = 30
            ,tmpdir=tmpdir
            ,condaenv = "env_samtools"
            ,condaenv_samtools_version="auto"
            #,condaenv_samtools_version="1.22"
        )
})



## ----eval=FALSE, eval=FALSE, include=FALSE------------------------------------
# 
# # Actual depth matrix of 120 samples had been generated using the follpwing code
# 
# SKIP=1
# if(!SKIP){
# 
#     # 120 bams
#     # 5.5 sec
#     system.time({
#         mtrx_Rsamtools =
#             get_depth_matrix(
#                 bam_files = bam_files,coord_or_target_virus_name = target_virus_name
#                 ,mode = "Rsamtools"
#                 ,N_cores = N_cores
#                 ,max_depth = 1e5
#                 ,min_mapq = 30
#                 ,tmpdir="tmpdir"
#             )
#     })
# 
# }
# 
# 
# # 4.7 sec
# system.time({
#     mtrx_samtools_reticulate =
#         get_depth_matrix(
#             bam_files = bam_files,coord_or_target_virus_name = target_virus_name
#             ,mode = "samtools_reticulate"
#             ,N_cores = N_cores
#             ,min_mapq = 30
#             ,tmpdir="tmpdir"
#             ,condaenv = "env_samtools"
#             ,condaenv_samtools_version="auto"
#             #,condaenv_samtools_version="1.22"
#         )
# })
# 
# 
# 
# SKIP=1
# if(!SKIP){
# 
#     #2.3 sec
#     system.time({
#         mtrx_samtools =
#             get_depth_matrix(
#                 bam_files = bam_files,coord_or_target_virus_name = target_virus_name
#                 ,mode = "samtools_custom"
#                 ,N_cores = N_cores
#                 ,min_mapq = 30
#                 ,tmpdir="tmpdir"
#                 ,modules="module/file/example/1.2" # name of modulefile the user want to use
#             )
#     })
# }
# 
# use_data(mtrx_samtools_reticulate)
# 

## ----fig.width=5,fig.height=3-------------------------------------------------
# loading precalculated depth matrix
data(mtrx_samtools_reticulate)

# threshold
th = 50
# histogram with adjustable thresholds for custom function
depth_hist(mtrx_samtools_reticulate,th=th,smry_fun=max)
depth_hist(mtrx_samtools_reticulate,th=th,smry_fun=quantile,prob=0.75)

# filtered matrix
base_resol_depth = filt_samples(mtrx_samtools_reticulate,th=th,smry_fun=max)
print(base_resol_depth[1:4,1:4])


## ----echo=TRUE, eval=FALSE----------------------------------------------------
# system.time({
#     result = run_ELViS(
#         X = base_resol_depth
#         ,N_cores=N_cores
#         ,reduced_output=TRUE
#     )
# 
# })
# 
# ELViS_toy_run_result = result
# use_data(ELViS_toy_run_result)
# 
# # 4min for 120 samples using 10 threads

## ----fig.width=7,fig.height=5-------------------------------------------------
# ELViS run result
data(ELViS_toy_run_result)
result = ELViS_toy_run_result

# Directory where figures will be saved
figure_dir = glue("{analysis_dir}/figures")
dir.create(figure_dir)

# give the gff3 file of the virus of your interest. Sequence name or chromosome name should match with that in the reference genome FASTA file.
gff3_fn = system.file("extdata","HPV16REF_PaVE.gff",package = package_name)

## ----fig.width=7,fig.height=5-------------------------------------------------
# Plotting raw depth profile
gg_lst_x = 
    plot_pileUp_multisample(
        result = result,
        X_raw = base_resol_depth,
        plot_target = "x",
        gff3 = gff3_fn,
        baseline=1,
        exclude_genes = c("E6*","E1^E4","E8^E2"),
    )

# Save to pdf file, set SKIP = FALSE if you want to save as pdf
SKIP = TRUE
if(!SKIP){
    pdf(glue("{figure_dir}/Raw_Depth_CNV_call.pdf"),height=4,width=6)
    gg_lst_x
    dev.off()
}

# an example of raw read depth line plot
print(gg_lst_x[[1]])

## ----fig.width=7,fig.height=5-------------------------------------------------
# set the longest segment as a new baseline
new_baseline = get_new_baseline(result,mode="longest")

# Plotting raw depth profile with new baseline
gg_lst_x = 
    plot_pileUp_multisample(
        result = result,
        X_raw = base_resol_depth,
        plot_target = "x",
        gff3 = gff3_fn,
        baseline=new_baseline,
        exclude_genes = c("E6*","E1^E4","E8^E2"),
    )
# Save to pdf file, set SKIP = FALSE if you want to save as pdf
SKIP = TRUE
if(!SKIP){
    # Save to pdf file
    pdf("figures/Raw_Depth_new_baseline_CNV_call.pdf",height=4,width=6)
    gg_lst_x
    dev.off()
}

# an example of raw read depth line plot with new baseline
gg_lst_x[[1]]

## ----fig.width=7,fig.height=5-------------------------------------------------
# Plotting normalized depth profile
gg_lst_y = 
    plot_pileUp_multisample(
        result = result,
        X_raw = base_resol_depth,
        plot_target = "y",
        gff3 = gff3_fn,
        baseline=new_baseline,
        exclude_genes = c("E6*","E1^E4","E8^E2"),
    )

# Save to pdf file
SKIP = TRUE
if(!SKIP){
    pdf("figures/Normalized_Depth_CNV_call.pdf",height=4,width=6)
    gg_lst_y
    dev.off()
}

# an example of normalized read depth line plot with new baseline
gg_lst_y[[1]]

## ----fig.width=7,fig.height=5-------------------------------------------------
# Plotting robust Z-score profile
gg_lst_z = 
    plot_pileUp_multisample(
        result = result,
        X_raw = base_resol_depth,
        plot_target = "z",
        gff3 = gff3_fn,
        baseline=new_baseline,
        exclude_genes = c("E6*","E1^E4","E8^E2")
    )

SKIP = TRUE
if(!SKIP){
    # Save to pdf file
    pdf("figures/Robust-Z-score_CNV_call.pdf",height=4,width=6)
    gg_lst_z
    dev.off()
}

# an example of Z-score line plot with new baseline
gg_lst_z[[1]]

## ----eval=FALSE, include=FALSE,echo = FALSE-----------------------------------
# set.seed(54374373); total_aligned_base__host_and_virus =
#     c(
#         sample( (4:6)*(10^8),80,replace = TRUE),
#         sample( (7:10)*(10^8),20,replace = TRUE),
#         sample( (1:3)*(10^8),20,replace = TRUE)
#     )
# 
# use_data(total_aligned_base__host_and_virus,overwrite = TRUE)

## ----fig.width=5,fig.height=4-------------------------------------------------
data(total_aligned_base__host_and_virus)

viral_load = (10^6)*(apply(base_resol_depth,2,\(x) sum(x)) )/total_aligned_base__host_and_virus

# distribtuion of overall viral load
viral_load %>%log10 %>% hist

## -----------------------------------------------------------------------------
exclude_genes = c("E6*","E1^E4","E8^E2")
integ_ht_result = integrative_heatmap(
    X_raw = base_resol_depth,
    result = result,
    gff3_fn = gff3_fn,
    exclude_genes = exclude_genes,
    baseline=1,
    total_aligned_base__host_and_virus = total_aligned_base__host_and_virus
)

# top annotation
top_ant =
    HeatmapAnnotation(
        `Log2 Overall\nViral Load` = anno_points(log2(viral_load)),
        annotation_name_side = "left",annotation_name_rot=0)

## -----------------------------------------------------------------------------
gene_ref="E7"

gene_cn = 
    gene_cn_heatmaps(
        X_raw = base_resol_depth,
        result = result,
        gff3_fn = gff3_fn,
        baseline = new_baseline,
        gene_ref = gene_ref,
        exclude_genes = exclude_genes
    )



## ----fig.width=7,fig.height=8-------------------------------------------------
draw(top_ant %v% integ_ht_result$Heatmap %v% gene_cn$Heatmaps$intact_gene_cn %v% gene_cn$Heatmaps$rel_dosage)

## -----------------------------------------------------------------------------
sessionInfo()

