## ----setup, include=FALSE-----------------------------------------------------
use_ragg <- requireNamespace("ragg", quietly = TRUE)
knitr::opts_chunk$set(
  fig.width = 10,
  fig.height = 6,
  out.width = "100%",
  fig.align = "center",
  fig.retina = 2,
  dev = if (use_ragg) "ragg_png" else "png",
  dpi = 120
)

## ----Loading the Package and Example Data, message=TRUE, warning=TRUE---------
suppressPackageStartupMessages({
  library(fRagmentomics)
})

# Locate the example files bundled with the package
mut_file <- system.file(
  "extdata/mutation",
  "cfdna-egfr-del_chr7_55241864_55243064_10k.mutations.tsv",
  package = "fRagmentomics"
)
bam_file <- system.file(
  "extdata/bam",
  "cfdna-egfr-del_chr7_55241864_55243064_10k.bam",
  package = "fRagmentomics"
)
fasta_file <- system.file(
  "extdata/fasta",
  "hg19_chr7_55231864_55253064.fa",
  package = "fRagmentomics"
)

# Print the file paths to confirm they were found
mut_file
bam_file
fasta_file

# Detect whether bcftools is available on the system PATH
has_bcftools <- nzchar(Sys.which("bcftools"))
if (!has_bcftools) {
  message(
    "bcftools not found on PATH; normalization examples will run without it."
  )
}

## ----run_analysis_basic, message=TRUE, warning=TRUE---------------------------
if (!has_bcftools) {
  message("bcftools not detected; running without external normalization.")
}

# Run the full analysis pipeline with default settings
df_frag_basic <- run_fRagmentomics(
  mut = mut_file,
  bam = bam_file,
  fasta = fasta_file,
  sample_id = "cfdna-egfr-del",
  apply_bcftools_norm = has_bcftools,
  verbose = FALSE,
  n_cores = 1
)

# View the dimensions and the first few rows
cat("Dimensions of basic results:", dim(df_frag_basic), "\n")
head(df_frag_basic)

## ----run_analysis_advanced, message=TRUE, warning=TRUE------------------------
if (!has_bcftools) {
  message(
    "bcftools not detected; running advanced workflow without external normalization."
  )
}

# Run the analysis with custom parameters
df_frag_advanced <- run_fRagmentomics(
  mut = mut_file,
  bam = bam_file,
  fasta = fasta_file,
  sample_id = "cfdna-egfr-del",
  neg_offset_mate_search = -500,
  pos_offset_mate_search = 500,
  flag_bam_list = list(
    isProperPair = TRUE,
    isUnmappedQuery = FALSE,
    hasUnmappedMate = FALSE,
    isSecondaryAlignment = FALSE
  ),
  report_bam_info = TRUE,
  report_softclip = TRUE,
  retain_fail_qc = TRUE,
  report_5p_3p_bases_fragment = 3,
  apply_bcftools_norm = has_bcftools,
  verbose = FALSE,
  n_cores = 1
)

# Observe that stricter filtering may result in fewer fragments
cat("Dimensions of advanced results:", dim(df_frag_advanced), "\n")

# View the newly added columns
head(df_frag_advanced[, c(
  "Fragment_Id",
  "TLEN",
  "Nb_Fragment_Bases_Softclip_5p",
  "Fragment_Bases_5p"
)])

## ----dataframe_output---------------------------------------------------------
# Dataframe output
df_frag_basic

## ----count_status_simple------------------------------------------------------
table(df_frag_advanced$Fragment_Status_Simple)

## ----count_status_detail------------------------------------------------------
table(df_frag_advanced$Fragment_Status_Detail)

## ----summary_size-------------------------------------------------------------
summary(df_frag_advanced$Fragment_Size)

## ----extract_vaf--------------------------------------------------------------
unique(df_frag_advanced$VAF)

## ----plot_size_histogram------------------------------------------------------
plot_size_distribution(
  df_fragments = df_frag_advanced,
  vals_z = c("MUT", "WT"),
  show_histogram = TRUE,
  show_density = FALSE,
  x_limits = c(100, 420),
  histo_args = list(alpha = 0.5)
)

## ----plot_size_density--------------------------------------------------------
plot_size_distribution(
  df_fragments = df_frag_advanced,
  show_histogram = FALSE,
  show_density = TRUE,
  x_limits = c(100, 420)
)

## ----plot_size_ungrouped------------------------------------------------------
plot_size_distribution(
  df_fragments = df_frag_advanced,
  col_z = NULL,
  show_histogram = TRUE,
  show_density = TRUE,
  x_limits = c(100, 420),
  histo_args = list(alpha = 0.5)
)

## ----plot_motif_hierarchical--------------------------------------------------
plot_motif_barplot(
  df_fragments = df_frag_basic,
  representation = "split_by_base",
  vals_z = c("MUT", "WT")
)

## ----plot_motif_hierarchical_custom-------------------------------------------
plot_motif_barplot(
  df_fragments = df_frag_basic,
  representation = "split_by_base",
  motif_start = c("C", "G"),
  vals_z = c("MUT", "WT"),
  alpha = 0.7 # Pass 'alpha' to geom_bar()
)

## ----plot_motif_differential--------------------------------------------------
plot_motif_barplot(
  df_fragments = df_frag_advanced,
  representation = "differential",
  vals_z = c("MUT", "WT")
)

## ----plot_motif_side_by_side--------------------------------------------------
plot_motif_barplot(
  df_fragments = df_frag_basic,
  representation = "split_by_motif",
  vals_z = c("MUT", "WT")
)

## ----plot_logo_comparison-----------------------------------------------------
plot_ggseqlogo_meme(
  df_fragments = df_frag_basic,
  motif_type = "Start",
  motif_size = 6,
  vals_z = c("MUT", "WT"),
  colors_z = c("#F6BD60", "#84A59D", "#FD96A9", "#083D77")
)

## ----plot_logo_single_group---------------------------------------------------
plot_ggseqlogo_meme(
  df_fragments = df_frag_basic,
  motif_type = "Both",
  motif_size = 4,
  vals_z = "MUT",
  colors_z = c("#F6BD60", "#84A59D", "#FD96A9", "#083D77")
)

## ----plot_freq_barplot--------------------------------------------------------
plot_freq_barplot(
  df_fragments = df_frag_basic,
  motif_size = 5,
  motif_type = "Start",
  alpha = 0.7
)

## ----session_info-------------------------------------------------------------
sessionInfo()

