## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    fig.width = 10
)

## ----include = FALSE, eval=FALSE----------------------------------------------
# library(devtools)
# use_vignette("your-vignette-name")
# build_vignettes()
# getwd()
# rmarkdown::render("Input-data-format.Rmd", output_dir = "../doc/")

## ----setup--------------------------------------------------------------------
library(Aerith)
library(dplyr)

## ----ft1-reading--------------------------------------------------------------
rds <- system.file("extdata", "demo.FT1.rds", package = "Aerith")
demo_file <- tempfile(fileext = ".FT1")
writeLines(readRDS(rds), demo_file)

# Read all MS1 scans into memory
# This approach is memory-intensive but provides fastest access for subsequent operations
all_scans <- readAllScanMS1(demo_file)

# Read a specific range of scans (scan numbers 1527-1550)
# Recommended when you know the specific scans of interest
scan_range <- readScansMS1(demo_file, 1527, 1550)

# Read a single scan by scan number
# Most memory-efficient for analyzing individual scans
single_scan <- readOneScanMS1(demo_file, 1555)

# Extract real scan data from the list structure
# Converts internal format to user-friendly data structure
processed_scan <- getRealScanFromList(all_scans[[88]])
plot(processed_scan)

## ----ft2-reading--------------------------------------------------------------
demo_file <- system.file("extdata", "demo.FT2", package = "Aerith")

# Read all MS2 scans
all_ms2_scans <- readAllScanMS2(demo_file)

# Read specific scan range for MS2 data
ms2_range <- readScansMS2(demo_file, 1399, 1500)

# Read individual MS2 scan
single_ms2_scan <- readOneScanMS2(demo_file, 1371)

# Process and visualize MS2 spectrum
processed_ms2_scan <- getRealScanFromList(all_ms2_scans[[128]])
plot(processed_ms2_scan)

## ----ft1-writing--------------------------------------------------------------
rds <- system.file("extdata", "demo.FT1.rds", package = "Aerith")
demo_file <- tempfile(fileext = ".FT1")
writeLines(readRDS(rds), demo_file)

# Read file header information (essential for maintaining file integrity)
header <- readFTheader(demo_file)

# Read all scans from the original file
ft1_data <- readAllScanMS1(demo_file)

# Create output directory
output_dir <- tempdir()

# Write subset containing first 10 scans
# This preserves file format while reducing file size significantly
writeAllScanMS1(header, ft1_data[1:10], file.path(output_dir, "demo10.FT1"))

# Verify file creation
subset_files <- list.files(output_dir, pattern = "demo10.FT1", full.names = TRUE)
print(paste("Created subset file:", subset_files))

## ----ft2-writing--------------------------------------------------------------
demo_file <- system.file("extdata", "demo.FT2", package = "Aerith")

# Read header and scan data
header <- readFTheader(demo_file)
ft2_data <- readAllScanMS2(demo_file)

# Create subset with first 10 MS2 scans
output_dir <- tempdir()
writeAllScanMS2(header, ft2_data[1:10], file.path(output_dir, "demo10.FT2"))

# Confirm successful file creation
subset_files <- list.files(output_dir, pattern = "demo10.FT2", full.names = TRUE)
print(paste("Created MS2 subset file:", subset_files))

## ----mzml-reading-------------------------------------------------------------
# mzML support requires the mzR package

demo_file <- system.file("extdata", "demo.mzML", package = "Aerith")

# Read MS1 data from mzML file
# mzML files can contain both MS1 and MS2 data in a single file
mzml_ms1_data <- readMzmlMS1(demo_file)

# Extract and visualize a specific MS1 scan
ms1_spectrum <- getRealScan(16, mzml_ms1_data)
plot(ms1_spectrum)

# Read MS2 data from the same mzML file
mzml_ms2_data <- readMzmlMS2(demo_file)

# Extract and visualize a specific MS2 scan
ms2_spectrum <- getRealScan(18, mzml_ms2_data)
plot(ms2_spectrum)

## ----mgf-reading--------------------------------------------------------------
demo_file <- system.file("extdata", "demo.mgf", package = "Aerith")

# Read MGF file containing MS2 spectra
# MGF files typically contain only MS2 spectra with associated metadata
mgf_data <- readMgf(demo_file)

# Extract and visualize a specific spectrum
selected_spectrum <- getRealScan(2688, mgf_data)
plot(selected_spectrum)

## ----pepxml-reading-----------------------------------------------------------
# pepXML parsing requires the mzR package

demo_file <- system.file("extdata", "demo.pepXML", package = "Aerith")

# Parse pepXML file to extract peptide identification results
# This creates a structured data frame with all identification information
pepxml_results <- readPepXMLtable(demo_file)

# Display structure of the results
str(pepxml_results)

## ----psm-reading--------------------------------------------------------------
demo_file <- system.file("extdata", "demo.psm.txt", package = "Aerith")

# Read peptide-spectrum match results
# Contains identification scores, modifications, and SIP-specific metrics
psm_data <- readPSMtsv(demo_file)

# Display key columns and data structure
head(psm_data)

## ----protein-cluster-reading--------------------------------------------------
demo_file <- system.file("extdata", "demo.pro.cluster.txt", package = "Aerith")

# Read protein clustering and grouping information
# Essential for protein-level quantification and SIP analysis
protein_clusters <- readPSMtsv(demo_file)

# Examine protein grouping structure
head(protein_clusters)

## ----sip-reading--------------------------------------------------------------
demo_file <- system.file("extdata", "demo.sip", package = "Aerith")

# Read SIP analysis results containing isotope incorporation metrics
# This file type is unique to SIP proteomics workflows
sip_results <- readPSMtsv(demo_file)

# Display SIP-specific columns
head(sip_results)

## ----spe2pep-reading----------------------------------------------------------
target_file <- system.file("extdata", "demo_target.Spe2Pep.txt", package = "Aerith")

# Read spectrum-to-peptide mapping files
# These files link MS2 spectra to peptide identifications
spe2pep_data <- readSpe2Pep(target_file)

# Extract PSM information from the parsed data
psm_from_spe2pep <- spe2pep_data$PSM

# Display mapping structure
head(psm_from_spe2pep)

## ----tic-analysis-------------------------------------------------------------
# Analyze TIC from MS1 data
rds <- system.file("extdata", "demo.FT1.rds", package = "Aerith")
demo_file <- tempfile(fileext = ".FT1")
writeLines(readRDS(rds), demo_file)
ms1_scans <- readAllScanMS1(demo_file)
ms1_tic <- getTIC(ms1_scans)

# Create TIC plot with specified retention time breaks
# The breaks parameter allows customization of the x-axis for better visualization
plotTIC(ms1_tic, seq(9, 10, by = 0.2))

# Analyze TIC from MS2 data
demo_file <- system.file("extdata", "demo.FT2", package = "Aerith")
ms2_scans <- readAllScanMS2(demo_file)
ms2_tic <- getTIC(ms2_scans)

# Plot MS2 TIC with the same retention time range for comparison
plotTIC(ms2_tic, seq(9, 10, by = 0.2))

## ----scan-frequency-analysis--------------------------------------------------
# Process MS2 data to extract retention time and precursor information
demo_file <- system.file("extdata", "demo.FT2", package = "Aerith")
ms2_scan_data <- readAllScanMS2(demo_file)
ms2_retention_info <- getRetentionTimeAndPrecursorInfo(ms2_scan_data)

# Process MS1 data for comparison
rds <- system.file("extdata", "demo.FT1.rds", package = "Aerith")
demo_file <- tempfile(fileext = ".FT1")
writeLines(readRDS(rds), demo_file)
ms1_scan_data <- readAllScanMS1(demo_file)
ms1_retention_info <- getRetentionTimeAndPrecursorInfo(ms1_scan_data)

# Create combined scan frequency visualization
# This plot shows the temporal distribution of MS1 and MS2 scans
combined_plot <- plotScanFrequency(ms2_retention_info,
    binwidth = 0.1,
    breaks = seq(9, 10, by = 0.2)
) +
    plotScanFrequencyMS2(ms1_retention_info, binwidth = 0.1)

print(combined_plot)

## ----precursor-analysis-------------------------------------------------------
demo_file <- system.file("extdata", "demo.FT2", package = "Aerith")
ms2_data <- readAllScanMS2(demo_file)
precursor_info <- getRetentionTimeAndPrecursorInfo(ms2_data)

# Create precursor m/z frequency plot
# This visualization shows how precursors are distributed across m/z and retention time
plotPrecursorMzFrequency(precursor_info,
    timeBinWidth = 0.1,
    x_breaks = seq(8, 11, by = 0.2)
)

## ----session-info-------------------------------------------------------------
sessionInfo()

