## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

## ----install, eval=FALSE------------------------------------------------------
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("MeLSI")

## ----basic_setup--------------------------------------------------------------
# Load required packages
library(MeLSI)
library(vegan)
library(ggplot2)

# Generate synthetic microbiome data for demonstration
# Using smaller dataset for faster vignette execution
set.seed(42)
test_data <- generate_test_data(n_samples = 40, n_taxa = 50, n_signal_taxa = 5)
X <- test_data$counts
y <- test_data$metadata$Group

# Display data dimensions
cat("Data dimensions:", nrow(X), "samples x", ncol(X), "taxa\n")
cat("Groups:", paste(unique(y), collapse = ", "), "\n")

## ----preprocessing------------------------------------------------------------
# CLR transformation (recommended for microbiome data)
# Use the built-in helper function for easy CLR transformation
X_clr <- clr_transform(X)

# Display preprocessing results
cat("CLR transformation completed\n")
cat("Data range:", round(range(X_clr), 3), "\n")

## ----run_melsi----------------------------------------------------------------
# Run MeLSI with default parameters
# Note: For real analyses, use n_perms = 200+ for reliable p-values
# Using reduced parameters here for faster vignette execution (<15 min for Bioconductor)
cat("Running MeLSI analysis...\n")
melsi_results <- melsi(
    X_clr, y, 
    n_perms = 50,    # Number of permutations (use 200+ for real analysis)
    B = 20,          # Number of weak learners (default 30, reduced for speed)
    m_frac = 0.8,    # Fraction of features per learner
    show_progress = TRUE
)

# Display results
cat("\nMeLSI Results:\n")
cat(sprintf("F-statistic value: %.4f\n", melsi_results$F_observed))
cat(sprintf("P-value: %.4f\n", melsi_results$p_value))
cat(sprintf("Significant: %s\n", ifelse(melsi_results$p_value < 0.05, "Yes", "No")))

# VIP plot is automatically generated, but you can also create it manually:
# Using top_n = 10 for faster execution
plot_vip(melsi_results, top_n = 10)

# PCoA plot using the learned MeLSI distance
plot_pcoa(melsi_results, X_clr, y)

## ----comparison---------------------------------------------------------------
# Compare with Euclidean distance
dist_euclidean <- dist(X_clr)
adonis_euclidean <- adonis2(dist_euclidean ~ y, permutations = 50)

# Compare with Bray-Curtis distance
dist_bray <- vegdist(X, method = "bray")
adonis_bray <- adonis2(dist_bray ~ y, permutations = 50)

# Display comparison results
cat("\nMethod Comparison:\n")
cat(sprintf("MeLSI:        F-statistic = %.4f, p = %.4f\n", 
           melsi_results$F_observed, melsi_results$p_value))
cat(sprintf("Euclidean:    F-statistic = %.4f, p = %.4f\n", 
           adonis_euclidean[["F"]][1], adonis_euclidean[["Pr(>F)"]][1]))
cat(sprintf("Bray-Curtis: F-statistic = %.4f, p = %.4f\n", 
           adonis_bray[["F"]][1], adonis_bray[["Pr(>F)"]][1]))

# Calculate improvement
improvement <- (melsi_results$F_observed - adonis_euclidean[["F"]][1]) / adonis_euclidean[["F"]][1] * 100
cat(sprintf("\nMeLSI improvement over Euclidean: %.1f%%\n", improvement))

## ----vip_plot-----------------------------------------------------------------
# Plot VIP with directionality (default)
# Using top_n = 10 for faster execution
plot_vip(melsi_results, top_n = 10, title = "Top 10 Features by Importance")

# Plot VIP without directionality coloring
plot_vip(melsi_results, top_n = 10, directionality = FALSE)

# Extract and examine feature weights programmatically
feature_weights <- melsi_results$feature_weights
top_features <- head(sort(feature_weights, decreasing = TRUE), 10)

cat("\nTop 10 Most Weighted Features:\n")
for (i in 1:length(top_features)) {
    cat(sprintf("%2d. %s (weight: %.4f)\n", 
               i, names(top_features)[i], top_features[i]))
}

## ----pcoa_plot----------------------------------------------------------------
# Create PCoA plot using the learned MeLSI distance matrix
plot_pcoa(melsi_results, X_clr, y, title = "PCoA: MeLSI Distance")

## ----advanced_usage, eval=FALSE-----------------------------------------------
# # Run MeLSI with custom parameters
# # Note: This example is set to eval=FALSE for faster vignette execution
# # Uncomment and run locally for full demonstration
# cat("Running MeLSI with custom parameters...\n")
# custom_results <- melsi(
#     X_clr, y,
#     n_perms = 200,   # More permutations for higher precision (recommended: 200+)
#     B = 50,          # Larger ensemble for more stable results
#     m_frac = 0.8,    # Fraction of features per learner
#     show_progress = TRUE
# )
# 
# cat(sprintf("Custom MeLSI F-statistic value: %.4f (p = %.4f)\n",
#            custom_results$F_observed, custom_results$p_value))

## ----phyloseq_integration, eval=FALSE-----------------------------------------
# # Load phyloseq data
# library(phyloseq)
# data(GlobalPatterns)  # Example phyloseq object
# 
# # Extract OTU table and sample data
# X <- as(otu_table(GlobalPatterns), "matrix")
# y <- sample_data(GlobalPatterns)$SampleType
# 
# # CLR transformation
# X_clr <- clr_transform(X)
# 
# # Run MeLSI analysis
# results <- melsi(X_clr, y, n_perms = 200, B = 30, show_progress = FALSE)
# # Note: For vignette execution, use reduced parameters (n_perms=50, B=20) to stay under 15 minutes
# 
# # Results can be used with other phyloseq functions
# # For example, you can add the learned distance matrix to a phyloseq object

## ----session_info-------------------------------------------------------------
sessionInfo()