## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    fig.width = 6,
    fig.height = 5,
    eval = TRUE
)
library(scGraphVerse)

## ----loaddata-----------------------------------------------------------------
# Note: This vignette requires external packages from Suggests
# Install if needed:
# BiocManager::install(c("TENxPBMCData", "scater", "SingleR", "celldex"))

# Helper function to preprocess PBMC data
preprocess_pbmc <- function(pbmc_obj) {
    sce <- scater::logNormCounts(pbmc_obj)
    symbols_tenx <- SummarizedExperiment::rowData(sce)$Symbol_TENx
    valid <- !is.na(symbols_tenx) & symbols_tenx != ""
    sce <- sce[valid, ]
    rownames(sce) <- make.unique(symbols_tenx[valid])
    SummarizedExperiment::assay(sce, "logcounts") <-
        as.matrix(SummarizedExperiment::assay(sce, "logcounts"))
    colnames(sce) <- paste0("cell_", seq_len(ncol(sce)))
    return(sce)
}

# 1. Load and preprocess PBMC data
if (requireNamespace("TENxPBMCData", quietly = TRUE)) {
    pbmc_obj <- TENxPBMCData::TENxPBMCData("pbmc3k")
    pbmc_obj1 <- TENxPBMCData::TENxPBMCData("pbmc4k")

    sce <- preprocess_pbmc(pbmc_obj)
    sce1 <- preprocess_pbmc(pbmc_obj1)

    # 2. Cell type annotation
    if (requireNamespace("celldex", quietly = TRUE) &&
        requireNamespace("SingleR", quietly = TRUE)) {
        ref <- celldex::HumanPrimaryCellAtlasData()
        pred1 <- SingleR::SingleR(test = sce1, ref = ref, labels=ref$label.main)
        SummarizedExperiment::colData(sce1)$pcell <- pred1$labels

        pred <- SingleR::SingleR(test = sce, ref = ref, labels=ref$label.main)
        SummarizedExperiment::colData(sce)$pcell <- pred$labels

        # 3. Select top 100 B-cell genes
        genes <- selgene(
            object = sce,
            top_n = 100,
            cell_type = "B_cell",
            cell_type_col = "pcell",
            remove_rib = TRUE,
            remove_mt = TRUE
        )

        genes1 <- selgene(
            object = sce1,
            top_n = 100,
            cell_type = "B_cell",
            cell_type_col = "pcell",
            remove_rib = TRUE,
            remove_mt = TRUE
        )

        # 4. Find intersection
        commong <- intersect(genes, genes1)

        # 5. Subset data
        pbmc1_sub <- sce[
            commong,
            SummarizedExperiment::colData(sce)$pcell == "B_cell"
        ]
        pbmc2_sub <- sce1[
            commong,
            SummarizedExperiment::colData(sce1)$pcell == "B_cell"
        ]

        pbmc1_sub <- pbmc1_sub[, 1:85]
        pbmc2_sub <- pbmc2_sub[, 1:85]

        # Create MultiAssayExperiment for multi-sample analysis
        bcell_mae <- create_mae(list(pbmc1 = pbmc1_sub, pbmc2 = pbmc2_sub))
    }
}

## ----genie3-------------------------------------------------------------------
# Choose method: "GENIE3", "GRNBoost2", "ZILGM", "PCzinb" or "JRF"
method <- "GENIE3"
if (exists("bcell_mae")) {
    networks <- infer_networks(
        count_matrices_list = bcell_mae,
        method = method,
        nCores = 1
    )
}

## ----adjacency, fig.alt="plot Graphs"-----------------------------------------
if (exists("networks")) {
    # Weighted adjacency (returns SummarizedExperiment)
    wadj_se <- generate_adjacency(networks)

    # Symmetrize (returns SummarizedExperiment)
    swadj_se <- symmetrize(wadj_se, weight_function = "mean")

    # Binary cutoff (top 1%, returns SummarizedExperiment)
    binary_se <- cutoff_adjacency(
        count_matrices = bcell_mae,
        weighted_adjm_list = swadj_se,
        n = 1,
        method = method,
        quantile_threshold = 0.99,
        nCores = 1
    )

    # Plot
    if (requireNamespace("ggraph", quietly = TRUE)) {
        plotg(binary_se)
    }
}

## ----consensus, fig.alt="plot consensus Graph"--------------------------------
if (exists("binary_se")) {
    # Consensus by vote
    consensus <- create_consensus(binary_se, method = "vote")

    # Plot consensus network
    if (requireNamespace("ggraph", quietly = TRUE)) {
        plotg(consensus)
    }
}

## ----comparecommunity, fig.alt="comparing Graphs with reference"--------------
if (exists("consensus")) {
    # Note: For comparison with external databases like STRINGdb,
    # use stringdb_adjacency() to fetch reference networks

    # Community detection
    if (requireNamespace("igraph", quietly = TRUE)) {
        communities <- community_path(consensus)
    }
}

## ----stringdb-----------------------------------------------------------------
# Note: This requires internet connection for STRINGdb downloads
# This section requires the STRINGdb package from Suggests
if (exists("consensus") && requireNamespace("STRINGdb", quietly = TRUE)) {
    str_result <- stringdb_adjacency(
        genes = rownames(consensus),
        species = 9606,
        required_score = 900,
        keep_all_genes = TRUE
    )

    # Extract binary adjacency and symmetrize
    str_binary <- str_result$binary
    ground_truth_se <- symmetrize(
        build_network_se(list(string_network = str_binary)),
        weight_function = "mean"
    )
    ground_truth <- SummarizedExperiment::assay(ground_truth_se, 1)

    # Edge mining: TP rates (returns list of dataframes)
    em <- edge_mining(
        consensus,
        ground_truth = ground_truth,
        query_edge_types = "TP"
    )

    # Display results
    if (length(em) > 0) {
        print(head(em[[1]]))
    }
}

## ----sessioninfo--------------------------------------------------------------
sessionInfo()