## ----style, echo = FALSE, results = 'asis', message=FALSE---------------------
BiocStyle::markdown()

## ----echo = FALSE, message = FALSE--------------------------------------------
library(Chromatograms)
library(BiocStyle)
register(SerialParam())

## -----------------------------------------------------------------------------
# A data.frame with chromatogram variables.
cdata <- data.frame(
    msLevel = c(1L, 1L),
    mz = c(112.2, 123.3),
    chromIndex = c(1L, 2L)
)

# Retention time and intensity values for each chromatogram.
pdata <- list(
    data.frame(
        rtime = c(11, 12.4, 12.8, 13.2, 14.6, 15.1, 16.5),
        intensity = c(50.5, 123.3, 153.6, 2354.3, 243.4, 123.4, 83.2)
    ),
    data.frame(
        rtime = c(45.1, 46.2, 53, 54.2, 55.3, 56.4, 57.5),
        intensity = c(100, 180.1, 300.45, 1400, 1200.3, 300.2, 150.1)
    )
)

# Create and initialize the backend
be <- backendInitialize(ChromBackendMemory(),
    chromData = cdata, peaksData = pdata
)

# Create Chromatograms object
chr <- Chromatograms(be)
chr

## -----------------------------------------------------------------------------
MRM_file <- system.file("proteomics", "MRM-standmix-5.mzML.gz",
    package = "msdata"
)

be <- backendInitialize(ChromBackendMzR(),
    files = MRM_file,
    BPPARAM = SerialParam()
)

chr_mzr <- Chromatograms(be)

## -----------------------------------------------------------------------------
length(chr)
length(chr_mzr)

## -----------------------------------------------------------------------------
peaksData(chr)

## -----------------------------------------------------------------------------
peaksData(chr, columns = c("rtime"), drop = TRUE)

chr$rtime

## -----------------------------------------------------------------------------
peaksData(chr) <- list(
    data.frame(
        rtime = c(1, 2, 3, 4, 5, 6, 7),
        intensity = c(1, 2, 3, 4, 5, 6, 7)
    ),
    data.frame(
        rtime = c(1, 2, 3, 4, 5, 6, 7),
        intensity = c(1, 2, 3, 4, 5, 6, 7)
    )
)

## -----------------------------------------------------------------------------
chr$rtime <- list(
    c(8, 9, 10, 11, 12, 13, 14),
    c(8, 9, 10, 11, 12, 13, 14)
)

## -----------------------------------------------------------------------------
chr_filt <- filterPeaksData(chr, variables = "rtime", ranges = c(12, 15))

length(chr_filt)

length(rtime(chr_filt))

## -----------------------------------------------------------------------------
chromData(chr)

## -----------------------------------------------------------------------------
chromData(chr, columns = c("msLevel"))

chr$chromIndex

## -----------------------------------------------------------------------------
chr$msLevel <- c(2L, 2L)

chromData(chr)

## -----------------------------------------------------------------------------
chr$extra <- c("extra1", "extra2")
chromData(chr)

## -----------------------------------------------------------------------------
chr_filt <- filterChromData(chr,
    variables = "chromIndex", ranges = c(1, 2),
    keep = TRUE
)

length(chr_filt)
length(chr)

## ----define-function----------------------------------------------------------
## Define a function that takes the backend as an input, divides the intensity
## by parameter y and returns it. Note that ... is required in
## the function's definition.
divide_intensities <- function(x, y, ...) {
    intensity(x) <- lapply(intensity(x), `/`, y)
    x
}

## Add the function to the procesing queue
chr_2 <- addProcessing(chr, divide_intensities, y = 2)
chr_2

## ----custom-processing--------------------------------------------------------
intensity(chr_2)
intensity(chr)

## ----applyProcessing----------------------------------------------------------
length(chr_2@processingQueue)
chr_2 <- applyProcessing(chr_2)

length(chr_2@processingQueue)
chr_2

## -----------------------------------------------------------------------------
processingChunkFactor(chr)

## -----------------------------------------------------------------------------
processingChunkFactor(chr_mzr) |>
  head()

## -----------------------------------------------------------------------------
processingChunkSize(chr_mzr) <- 10

processingChunkFactor(chr_mzr) |> table()

## -----------------------------------------------------------------------------
print(object.size(chr_mzr), units = "Mb")
chr_mzr <- setBackend(chr_mzr, ChromBackendMemory(), BPPARAM = SerialParam())

chr_mzr

chr_mzr@backend@peaksData[[1]] |> head() # data is now in memory

## -----------------------------------------------------------------------------
print(object.size(chr_mzr), units = "Mb")

## ----message=FALSE------------------------------------------------------------
library(Spectra)
library(IRanges)
sp <- Spectra(
  DataFrame(
    rtime = c(100, 110, 120, 130, 140),
    msLevel = c(1L, 1L, 1L, 1L, 1L),
    dataOrigin = rep("example", 5L),
    mz = NumericList(
      c(100, 101), c(100, 101), c(100, 101), c(100, 101), c(100, 101),
      compress = FALSE
    ),
    intensity = NumericList(
      c(10, 20), c(15, 25), c(30, 5), c(12, 18), c(40, 2),
      compress = FALSE
    )
  ),
  source = MsBackendDataFrame()
)

chr_s <- Chromatograms(sp)

## -----------------------------------------------------------------------------
chr_s

## -----------------------------------------------------------------------------
## Create custom metadata for EIC extraction
custom_cd <- data.frame(
    msLevel = c(1L, 1L),
    dataOrigin = rep(dataOrigin(sp)[1], 2),
    mzMin = c(100, 200),
    mzMax = c(100.5, 200.5)
)

chr_custom <- Chromatograms(sp, chromData = custom_cd)
chr_custom

## -----------------------------------------------------------------------------
## Work on a copy so downstream examples are not affected
chr_s_tmp <- chr_s

## Modify metadata
chr_s_tmp$msLevel <- rep(2L, length(chr_s_tmp))

## Re-factorize to update the groupings
chr_s_tmp <- factorize(chr_s_tmp)

chromData(chr_s_tmp)

## -----------------------------------------------------------------------------
chromData(chr_s)$rtmin <- 125
chromData(chr_s)$rtmax <- 180
chromData(chr_s)$mzmin <- 100
chromData(chr_s)$mzmax <- 100.5

## -----------------------------------------------------------------------------
library(RColorBrewer)
col3 <- brewer.pal(3, "Dark2")

plotChromatograms(chr_s, col = col3)

## -----------------------------------------------------------------------------
plotChromatogramsOverlay(chr_s, col = col3)

## -----------------------------------------------------------------------------
## Define peaks of interest with retention time windows
peak_table <- data.frame(
    rtMin = c(8, 11),
    rtMax = c(10, 13),
    msLevel = c(2L, 2L),
    chromIndex = c(1L, 2L)
)

## Extract those regions
chr_extracted <- chromExtract(chr, peak_table,
                              by = c("msLevel", "chromIndex"))

chr_extracted

## -----------------------------------------------------------------------------
chromData(chr_extracted)

## -----------------------------------------------------------------------------
## Define peak table with both retention time and m/z windows
peak_table_mz <- data.frame(
    rtMin = c(125, 125),
    rtMax = c(180, 180),
    mzMin = c(100, 140),
    mzMax = c(100.5, 140.5),
    msLevel = c(1L, 1L),
    dataOrigin = rep(dataOrigin(chr_s)[1], 2),
    featureID = c("feature_1", "feature_2")
)

## Extract EICs for these features
chr_eics <- chromExtract(chr_s, peak_table_mz,
                        by = c("msLevel", "dataOrigin"))

chr_eics

## -----------------------------------------------------------------------------
chromData(chr_eics)

## -----------------------------------------------------------------------------
## A small Spectra with some missing intensities at m/z 100
sp_gaps <- Spectra(
    DataFrame(
        rtime      = c(100, 110, 120, 130, 140, 150, 160, 170, 180),
        msLevel    = rep(1L, 9),
        dataOrigin = rep("impute_demo", 9),
        mz = NumericList(100, 100, 100, 100, 100, 100, 100, 100, 100,
                         compress = FALSE),
        intensity = NumericList(50, NA, 120, 200, NA, NA, 80, 30, 10,
                               compress = FALSE)
    ),
    source = MsBackendDataFrame()
)

## Derive a Chromatograms and extract the EIC for m/z ≈ 100
chr_gaps <- Chromatograms(sp_gaps)
eic_table <- data.frame(
    rtMin = 100, rtMax = 180,
    mzMin = 99.5, mzMax = 100.5,
    msLevel = 1L,
    dataOrigin = "impute_demo"
)

chr_eic <- chromExtract(chr_gaps, eic_table,
                        by = c("msLevel", "dataOrigin"))
chr_eic

## ----fig.height=10, fig.width=8-----------------------------------------------
## Create copies for comparison
chr_linear <- imputePeaksData(chr_eic, method = "linear")
chr_spline <- imputePeaksData(chr_eic, method = "spline")
chr_gaussian <- imputePeaksData(chr_eic, method = "gaussian",
                                window = 2, sd = 1)
chr_loess <- imputePeaksData(chr_eic, method = "loess", span = 0.75)

## Plot all methods for comparison
par(mfrow = c(3, 2), mar = c(4, 4, 2, 1))

## Original data
plotChromatograms(chr_eic, main = "Original EIC")

## Linear interpolation
plotChromatograms(chr_linear, main = "Linear Imputation")

## Spline interpolation
plotChromatograms(chr_spline, main = "Spline Imputation")

## Gaussian smoothing
plotChromatograms(chr_gaussian, main = "Gaussian Smoothing (window=2, sd=1)")

## LOESS smoothing
plotChromatograms(chr_loess, main = "LOESS Smoothing (span=0.75)")

## -----------------------------------------------------------------------------
## For on-disk backends, add imputation to the lazy queue
chr_mzr_imputed <- imputePeaksData(
  chr_mzr,
  method = "gaussian",
  window = 5,
  sd = 2
)

chr_mzr_imputed

## -----------------------------------------------------------------------------
## This reads from disk and applies imputation in one step
peak_data <- peaksData(chr_mzr_imputed[1])

## -----------------------------------------------------------------------------
length(chr_mzr_imputed@processingQueue)

## -----------------------------------------------------------------------------
## For in-memory backends, you can persist the imputation
chr_in_memory <- setBackend(chr_mzr_imputed, ChromBackendMemory())
chr_in_memory <- applyProcessing(chr_in_memory)

# Now imputation is permanently applied
length(chr_in_memory@processingQueue)

## ----compare-chromatograms-mrm------------------------------------------------
## Pick 8 MRM chromatograms, skipping the first (a TIC with no m/z info)
chr_sub <- chr_mzr[2:9]

cor_arr <- compareChromatograms(chr_sub)
cor_arr[, , "score"]   ## similarity scores
cor_arr[, , "n_peaks"] ## number of overlapping RT points per pair

## Use a chromData column as row/column labels
cor_arr_labeled <- compareChromatograms(chr_sub, labelsColumn = "chromIndex")
cor_arr_labeled[, , "score"]

## ----compare-chromatograms-heatmap, fig.width=6, fig.height=5-----------------
library(pheatmap)
## Label rows/columns with precursor → product m/z transitions
mz_labels <- paste0(round(chromData(chr_sub)$precursorMz, 1), " → ",
                     round(chromData(chr_sub)$productMz, 1))
score_mat <- cor_arr[, , "score"]
rownames(score_mat) <- colnames(score_mat) <- mz_labels
pheatmap(score_mat, main = "Pairwise Pearson correlation",
         color = hcl.colors(30, palette = "RdYlBu", rev = TRUE))

## ----compare-chromatograms-custom---------------------------------------------
cosine <- function(x, y) {
    sum(x * y) / (sqrt(sum(x^2)) * sqrt(sum(y^2)))
}

cos_arr <- compareChromatograms(chr_sub, FUN = cosine)
cos_arr[, , "score"]

## ----compare-chromatograms-minpeaks-------------------------------------------
## Require at least 10 overlapping RT points to compute a score
cor_strict <- compareChromatograms(chr_sub, minPeaks = 10L)
cor_strict[, , "score"]   ## NAs for pairs with < 10 common RT points
cor_strict[, , "n_peaks"] ## actual overlap counts are always recorded

## ----compare-chromatograms-xy-------------------------------------------------
## Compare the first 4 chromatograms against the last 4
res <- compareChromatograms(chr_sub[1:4], chr_sub[5:8])
res[, , "score"]

## ----compare-chromatograms-groups, eval=FALSE---------------------------------
# grp_list <- split(chr_sub, chromData(chr_sub)$dataOrigin)
# lapply(grp_list, compareChromatograms)

## ----si-----------------------------------------------------------------------
sessionInfo()

