## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)
library(DT)
library(htmltools)

## ----install-bioc, eval=FALSE-------------------------------------------------
# if (!require("BiocManager", quietly = TRUE)) {
#   install.packages("BiocManager")
# }
# BiocManager::install("MutSeqR")

## ----load-lib-----------------------------------------------------------------
library(MutSeqR)

## ----load-MutSeqRData---------------------------------------------------------
library(ExperimentHub)
# load the index
eh <- ExperimentHub()
# Query the index for MutSeqRData records and their associated accessors.
query(eh, "MutSeqRData")

## ----find_BS_genome-----------------------------------------------------------
mouse_BS_genome <- find_BS_genome(organism = "mouse", genome = "mm10")
print(mouse_BS_genome)

## ----install_BS_genome, eval=FALSE--------------------------------------------
# BiocManager("BSgenome.Mmusculus.UCSC.mm10")

## ----import-vcf---------------------------------------------------------------
example_file <- eh[["EH9859"]]

sample_metadata <- data.frame(
  sample = "dna00996.1",
  dose = "50",
  dose_group = "High"
)
# Import the data
imported_example_data <- import_vcf_data(
  vcf_file = example_file,
  sample_data = sample_metadata,
  BS_genome = mouse_BS_genome
)

## ----import-vcf-table, echo=FALSE---------------------------------------------
DT::datatable(head(imported_example_data), options = list(scrollX = TRUE))

## ----import-mut---------------------------------------------------------------
example_data <- eh[["EH9857"]]

sample_metadata <- data.frame(
  sample = "dna00996.1",
  dose = "50",
  dose_group = "High"
)
# Import the data
imported_example_data <- import_mut_data(
  mut_file = example_data,
  sample_data = sample_metadata,
  BS_genome = find_BS_genome("mouse", "mm10"), # Must be installed!!
  is_0_based_mut = TRUE # indicates that the genomic coordinates are 0-based.
# Coordinates will be changed to 1-based upon import.
)

## ----import-mut-table, echo=FALSE---------------------------------------------
DT::datatable(head(imported_example_data), options = list(scrollX = TRUE))

## ----import-mut-rg------------------------------------------------------------
imported_example_data <- import_mut_data(
  mut_file = example_data,
  sample_data = sample_metadata,
  BS_genome = find_BS_genome("mouse", "mm10"),
  is_0_based_mut = TRUE,
  regions = "TSpanel_mouse"
)

## ----import-mut-table-rg, echo=FALSE------------------------------------------
DT::datatable(head(imported_example_data), options = list(scrollX = TRUE))

## ----load-rg------------------------------------------------------------------
region_example <- load_regions_file("TSpanel_mouse")
region_example

## ----import-mut-custom-cols, message=TRUE-------------------------------------
mut_data <- eh[["EH9858"]]

imported_example_data_custom <- import_mut_data(
  mut_file = mut_data,
  custom_column_names = list(my_contig_name = "contig",
                             my_sample_name = "sample")
)

## ----import-mut-cust-table, echo=FALSE----------------------------------------
DT::datatable(head(imported_example_data_custom), options = list(scrollX = TRUE))

## ----filter-mut, message=TRUE-------------------------------------------------
# load the example data
example_data <- eh[["EH9860"]]

# Filter
filtered_example_mutation_data <- filter_mut(
  mutation_data = example_data,
  vaf_cutoff = 0.01,
  regions = "TSpanel_mouse",
  regions_filter = "keep_within",
  custom_filter_col = "filter",
  custom_filter_val = "EndRepairFillInArtifact",
  custom_filter_rm = FALSE,
  snv_in_germ_mnv = TRUE,
  rm_filtered_mut_from_depth = TRUE,
  return_filtered_rows = FALSE
)

## ----filter-mut-table, echo=FALSE---------------------------------------------
filter_table <- filtered_example_mutation_data %>%
  dplyr::filter(filter_mut == TRUE)
DT::datatable(head(filter_table), options = list(scrollX = TRUE))

## ----mf-global----------------------------------------------------------------
# load example data:
example_data <- eh[["EH9861"]]

mf_data_global <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = "sample",
  subtype_resolution = "none",
  retain_metadata_cols = c("dose_group", "dose")
)

## ----mf-global-table, echo=FALSE----------------------------------------------
DT::datatable(mf_data_global,
  options = list(
    columnDefs = list(
      list(targets = c("mf_min", "mf_max"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----mf-rg--------------------------------------------------------------------
mf_data_rg <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = c("sample", "label"),
  subtype_resolution = "none",
  retain_metadata_cols = "dose_group"
)

## ----mf-rg-table, echo=FALSE--------------------------------------------------
DT::datatable(mf_data_rg,
  options = list(
    columnDefs = list(
      list(targets = c("mf_min", "mf_max"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----mean-mf------------------------------------------------------------------
mf_data_global <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = "sample",
  subtype_resolution = "none",
  retain_metadata_cols = c("dose_group", "dose")
)
mean_mf <- mf_data_global %>%
  dplyr::group_by(dose_group) %>%
  dplyr::summarise(mean_mf_min = mean(mf_min),
                   SE = sd(mf_min) / sqrt(dplyr::n()))

## ----mf-mean-table, echo=FALSE------------------------------------------------
DT::datatable(mean_mf,
  options = list(
    columnDefs = list(
      list(targets = c("mean_mf_min", "SE"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----mf-6---------------------------------------------------------------------
mf_data_6 <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = "sample",
  subtype_resolution = "base_6"
)

## ----mf-6-table, echo=FALSE---------------------------------------------------
DT::datatable(mf_data_6,
  options = list(
    scrollX = TRUE,
    columnDefs = list(
      list(targets = c("mf_min", "mf_max"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----mf-indels----------------------------------------------------------------
mf_data_global_indels <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = "sample",
  subtype_resolution = "none",
  variant_types = c("insertion", "deletion")
)

## ----mf-indels-table, echo=FALSE----------------------------------------------
DT::datatable(mf_data_global_indels,
  options = list(
    columnDefs = list(
      list(targets = c("mf_min", "mf_max"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----mf-types-----------------------------------------------------------------
mf_data_types <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = "sample",
  subtype_resolution = "type",
  variant_types = c("-ambiguous", "-uncategorized")
)

## ----mf-types-table, echo=FALSE-----------------------------------------------
DT::datatable(mf_data_types,
  options = list(
    scrollX = TRUE,
    columnDefs = list(
      list(targets = c("mf_min", "mf_max"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----mf-96--------------------------------------------------------------------
mf_data_96 <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = "sample",
  subtype_resolution = "base_96",
  variant_types = "snv"
)

## ----mf-96_tables, echo=FALSE-------------------------------------------------
DT::datatable(mf_data_types,
  options = list(
    scrollX = TRUE,
    columnDefs = list(
      list(targets = c("mf_min", "mf_max"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----precalc1-depth-----------------------------------------------------------
sample_depth <- data.frame(
  sample = unique(example_data$sample),
  group_depth = c(565395266, 755574283, 639909215, 675090988, 598104021,
                  611295330, 648531765, 713240735, 669734626, 684951248,
                  716913381, 692323218, 297661400, 172863681, 672259724,
                  740901132, 558051386, 733727643, 703349287, 884821671,
                  743311822, 799605045, 677693752, 701163532)
)

DT::datatable(sample_depth)

## ----mf-precalc-global--------------------------------------------------------
mf_data_global_precalc <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = "sample",
  subtype_resolution = "none",
  calculate_depth = FALSE,
  precalc_depth_data = sample_depth
)

## ----mf-precalc1-table, echo=FALSE--------------------------------------------
DT::datatable(mf_data_global_precalc,
  options = list(
    columnDefs = list(
      list(targets = c("mf_min", "mf_max"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----mf-precalc-6-------------------------------------------------------------
mf_data_6_precalc <- calculate_mf(
  mutation_data = example_data,
  cols_to_group = "sample",
  subtype_resolution = "base_6",
  calculate_depth = FALSE,
  precalc_depth = eh[["EH9862"]]
)

## ----precalc2-depth, echo=FALSE-----------------------------------------------
depth <- eh[["EH9862"]]

DT::datatable(depth)

## ----mf-precalc2-table, echo=FALSE--------------------------------------------
DT::datatable(mf_data_6_precalc,
  options = list(
    scrollX = TRUE,
    columnDefs = list(
      list(targets = c("mf_min", "mf_max"),
           render = DT::JS("function(data, type, row, meta) { return data.toExponential(2); }"))
    ),
    rowCallback = DT::JS(
      "function(row, data, dataIndex) {
        $('td:eq(2)', row).css('text-align', 'right'); // Align the content of column 2 to the right
      }"
    )
  )
)

## ----pre-calc-other-ex--------------------------------------------------------
base_12 <- eh[["EH9863"]]
base_96 <- eh[["EH9864"]]
base_192 <- eh[["EH9865"]]

## ----plot-mf-caption, include=FALSE-------------------------------------------
caption <- paste(
  "Mutation Frequency (MF) Minimum and Maximum (mutations/bp) per Sample.",
  "Light colored bars represent MFmin and dark coloured bars represent MFmax.",
  "Bars are coloured and grouped by dose.",
  "Data labels are the number of mutations per sample."
)

## ----plot-mf, fig.width=12, fig.height=6, fig.cap=caption---------------------
# Define the order for dose groups
mf_data_global$dose_group <- factor(
  mf_data_global$dose_group,
  levels = c("Control", "Low", "Medium", "High")
)
plot <- plot_mf(
  mf_data = mf_data_global,
  group_col = "sample",
  plot_type = "bar",
  mf_type = "both",
  fill_col = "dose_group",
  group_order = "arranged",
  group_order_input = "dose_group"
)
plot

## ----plot-mean-mf-caption, include=FALSE--------------------------------------
caption <- paste(
  "Mean Mutation Frequency (MF) Minimum per Dose. Lines are mean ± S.E.M.",
  "Points are individual samples, coloured by dose."
)

## ----plot-mean-mf, fig.cap=caption--------------------------------------------
plot_mean <- plot_mean_mf(
  mf_data = mf_data_global,
  group_col = "dose_group",
  mf_type = "min",
  fill_col = "dose_group",
  add_labels = "none",
  group_order = "arranged",
  group_order_input = "dose_group",
  plot_legend = FALSE,
  x_lab = "Dose Group"
)
plot_mean

## ----write-excel, eval=FALSE--------------------------------------------------
# # save a single data frame to an Excel file
# write_excel(mf_data_global, workbook_name = "example_mf_data")
# 
# # Write multiple data frames to a list to export all at once.
# list <- list(mf_data_global, mf_data_rg, mf_data_6)
# names(list) <- c("mf_per_sample", "mf_per_region", "mf_6spectra")
# 
# #save a list of data frames to an Excel file
# write_excel(list, workbook_name = "example_mf_data_list")
# 

## ----write-vcf, eval=FALSE----------------------------------------------------
# write_vcf_from_mut(example_data)

## ----download-yaml, eval=FALSE------------------------------------------------
# config <- system.file("extdata", "inputs", "summary_config.yaml", package = "MutSeqR")
# file.copy(from = config, to = "path/to/save/the/summary_config.yaml")

## ----render-report, eval=FALSE------------------------------------------------
# render_report(config_file_path = "path/to/summary_config.yaml",
#               output_file = "path/to/output_file.html",
#               output_format = "html_document")

## ----session-info, echo=FALSE-------------------------------------------------
sessionInfo()

