## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
library(sfi)

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# BiocManager::install("sfi")

## ----fig.cap="Schematic representation of the Single File Injection (SFI) workflow. (Top) A pooled Quality Control (QC) sample is injected multiple times at the beginning of the run to establish a robust reference for peak alignment. Individual study samples are then injected sequentially at short, fixed time intervals (e.g., every 1 min) into a continuous isocratic flow. This results in a single, long mzML file containing interleaved chromatograms from all samples, which `sfi` separates and aligns. (Bottom) Demonstration of peak recovery algorithms for SFI data using fixed time intervals", out.width="80%"----
knitr::include_graphics("https://github.com/yufree/presentation/blob/gh-pages/figure/SFI.png?raw=true")

## ----eval=FALSE---------------------------------------------------------------
# path <- 'sfi.mzML'
# peak <- getmzml(path)

## -----------------------------------------------------------------------------
# load demo data
data(sfi)
# perform peaks picking
peaklist <-  find_2d_peaks(mz=sfi$mz,rt=sfi$rt,intensity=sfi$intensity)

## -----------------------------------------------------------------------------
# injection interval
idelta <- 92
# time windows for a full separation
windows <- 632
# sample numbers in the files
n <- 100
# retention time shift in seconds
deltart <- 10
# min peak number in pooled qc samples 
minn <- 6
# align peaks from sfi. Note: peaklist is an sfi_peaks object, so we can pass it directly.
se <- getsfm(peaklist, idelta=idelta, windows=windows, n=100, deltart=10, minn=6)
# The output is a SummarizedExperiment object
se

## ----eval=FALSE---------------------------------------------------------------
# # extract feature matrix with row metadata
# library(SummarizedExperiment)
# feature_table <- cbind(as.data.frame(rowData(se)), as.data.frame(assay(se)))
# # save feature as csv file
# library(data.table)
# fwrite(feature_table, 'featurelist.csv')

## ----downstream-viz, message=FALSE--------------------------------------------
if (requireNamespace("SummarizedExperiment", quietly = TRUE) && 
    requireNamespace("ggplot2", quietly = TRUE)) {
  library(SummarizedExperiment)
  library(ggplot2)
  
  # Basic data access
  intensities <- assay(se, "intensities")
  
  # Visualization: Distribution of intensities across samples
  # Reshape for ggplot2
  plot_df <- data.frame(
    Intensity = as.vector(intensities),
    Sample = rep(colnames(intensities), each = nrow(intensities))
  )
  # Filter out zeros for log transformation
  plot_df <- plot_df[plot_df$Intensity > 0, ]
  
  ggplot(plot_df, aes(x = Sample, y = Intensity, fill = Sample)) +
    geom_boxplot() +
    scale_y_log10() +
    theme_minimal() +
    theme(axis.text.x = element_blank()) +
    labs(title = "Intensity Distribution Across Samples",
         y = "Log10 Intensity",
         x = "Sample Index") +
    guides(fill = "none")
}

## ----downstream-norm, message=FALSE-------------------------------------------
if (requireNamespace("SummarizedExperiment", quietly = TRUE) && 
    requireNamespace("MsCoreUtils", quietly = TRUE)) {
  library(SummarizedExperiment)
  library(MsCoreUtils)
  
  # Log2 transformation
  # We can create a new assay in the SummarizedExperiment
  assay(se, "log2") <- log2(assay(se, "intensities") + 1)
  
  # Quantile normalization
  # assay(se, "norm") <- MsCoreUtils::normalizeMethods()[["quantile"]](assay(se, "log2"))
  
  # Example: Summarize feature metadata
  cat("Total number of features:", nrow(se), "\n")
  cat("M/Z range:", paste(range(rowData(se)$mz), collapse = " - "), "\n")
  
  # Accessing sample metadata for group-wise analysis
  colData(se)$Group <- rep(c("Control", "Treatment"), length.out = ncol(se))
  
  # You can now pass this object to other tools like limma or DESeq2
  # print(se)
}

## -----------------------------------------------------------------------------
# get windows and delta time
sfmsub <- peaklist[peaklist$intensity>1e4,]
class(sfmsub) <- c("sfi_peaks", "data.frame") # Preserve class for method dispatch
# get windows and delta time
sfi_params <- get_sfi_params(sfmsub, n=158, deltart = 5)

## -----------------------------------------------------------------------------
windows <- sfi_params['window']
idelta <- sfi_params['idelta']
se <- getsfm(peaklist,
              idelta = idelta, windows = windows,
              n = 158, deltart = 10, minn = 6)

## -----------------------------------------------------------------------------
sessionInfo()

