## -----------------------------------------------------------------------------
set.seed(003)
library(singIST)

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
gse <- msigdb::getMsigdb(org = "hs", id = c("SYM", "EZID"),
                         version = msigdb::getMsigdbVersions()[1])

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Pathway definition (curated gene set identifier)
dc_pathway <- create_pathway(standard_name = "BIOCARTA_DC_PATHWAY",
                             dbsource = "BIOCARTA",
                             collection = "c2",
                             subcollection = "CP")
# Superpathway = pathway + modeled cell types + per cell type gene sets
dc_superpathway <- create_superpathway(pathway_info = dc_pathway,
                                       celltypes = c("Dendritic Cells",
                                                     "Langerhan Cells",
                                                     "T-cell"),
                                       list())
# Assign gene sets per cell type (retrieve gene set from MSigDB)
dc_superpathway <- setGeneSetsCelltype(dc_superpathway, gse = gse)

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Pathway definition (curated gene set identifier)
cytokine_pathway <- create_pathway(
    standard_name = "KEGG_CYTOKINE_CYTOKINE_RECEPTOR_INTERACTION",
    dbsource = "KEGG",
    collection = "c2",
    subcollection = "CP"
)
# Superpathway = pathway + modeled cell types + per cell type gene sets
cytokine_superpathway <- create_superpathway(
    pathway_info = cytokine_pathway,
    celltypes = c("Dendritic Cells", "Keratinocytes", "Langerhan Cells",
                  "Melanocytes", "T-cell"),
    list()
)
# Pull the curated gene set (retrieve gene set from MSigDB)
gene_set <- pullGeneSet(cytokine_pathway, gse = gse)
# Provide a different gene set per cell type
cytokine_superpathway$gene_sets_celltype <- list(
    "Dendritic Cells" = sample(gene_set, 50),
    "Keratinocytes" = sample(gene_set, 100),
    "Langerhan Cells" = sample(gene_set, 10),
    "Melanocytes" = sample(gene_set, 60),
    "T-cell" = sample(gene_set, 160)
)

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Load the pseudobulk matrix
file_dc <- system.file("extdata", "DC_counts.rda", package = "singIST")
obj_dc <- load(file_dc)
counts_dc <- get(obj_dc[1])
# Remove the cell types that we are not going to model
counts_dc <- counts_dc[!(sub("_.*$", "", rownames(counts_dc)) %in% 
                        c("MastC-other", "Keratinocytes", "Melanocytes")), ]

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Load the pseudobulk matrix
sample_id_dc <- c("AD1", "AD13", "AD2", "AD3","AD4", "HC1-2", "HC3", "HC4",
                  "HC5")
sample_class_dc <- c(rep("AD", 5), rep("HC", 4))
base_class_dc <- "HC"
target_class_dc <- "AD"

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Initialize hyperparameter object 
# Choose the array of quantile combinations that will be tested in the Cross
# Validation. Ideally one should choose a wide range of quantiles to test,
# as a narrow selection might not find the globally optimal model. The more
# quantiles one chooses the more computation time it will take, for the sake
# of simplicity we choose a small amount of quantiles to test
quantile_comb_table_dc <- as.matrix(
    RcppAlgos::permuteGeneral(seq(0.1, 0.95, by = 0.3),
                                m = length(dc_superpathway$celltypes), 
                                TRUE))
# Store tuning + CV design
dc_hyperparameters <- create_hyperparameters(
    quantile_comb_table = quantile_comb_table_dc,
    outcome_type = "binary",
    number_PLS = as.integer(3),
    folds_CV = as.integer(1), #LOOCV
    repetition_CV = as.integer(1)
)

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
dc_superpathway_input <- create_superpathway_input(
    superpathway_info = dc_superpathway,
    hyperparameters_info = dc_hyperparameters,
    pseudobulk_lognorm = counts_dc,
    sample_id = sample_id_dc,
    sample_class = sample_class_dc,
    base_class = base_class_dc,
    target_class = target_class_dc
)

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Load the pseudobulk matrix
file_cytokine <- system.file("extdata", "cytokine_counts.rda", package = "singIST")
obj_cytokine <- load(file_cytokine)
counts_cytokine <- get(obj_cytokine[1])
# Remove the cell types that we are not going to model
counts_cytokine <- counts_cytokine[!(sub("_.*$", "", rownames(counts_cytokine)) 
                                     %in% 
                                 c("MastC-other")), ]

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Load the pseudobulk matrix
sample_id_cytokine <- c("AD1", "AD13", "AD2", "AD3","AD4", "HC1-2", "HC3",
                        "HC4", "HC5")
sample_class_cytokine <- c(rep("AD", 5), rep("HC", 4))
base_class_cytokine <- "HC"
target_class_cytokine <- "AD"

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Initialize hyperparameter object 
quantile_comb_table <- as.matrix(
    RcppAlgos::permuteGeneral(seq(0.05, 0.95, by = 0.2),
                                m = length(cytokine_superpathway$celltypes), 
                                TRUE))
# Store tuning + CV design
cytokine_hyperparameters <- create_hyperparameters(
                                    quantile_comb_table = quantile_comb_table,
                                    outcome_type = "binary",
                                    number_PLS = as.integer(2),
                                    folds_CV = as.integer(5),
                                    repetition_CV = as.integer(5))

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
cytokine_superpathway_input <- create_superpathway_input(
    superpathway_info = cytokine_superpathway,
    hyperparameters_info = cytokine_hyperparameters,
    pseudobulk_lognorm = counts_cytokine,
    sample_id = sample_id_cytokine,
    sample_class = sample_class_cytokine,
    base_class = base_class_cytokine,
    target_class = target_class_cytokine)

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
file_oxa <- system.file("extdata", "OXA.rda", package = "singIST")
obj_oxa <- load(file_oxa)
oxazolone <- get(obj_oxa[1])

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# The example object included in singIST uses these column positions
colnames(slot(oxazolone, "meta.data"))[c(5,11,1)] <- c("class",
                                                       "celltype_cluster",
                                                       "donor")
# If the object is a SingleCellExperiment, metadata live in colData()

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
celltype_mapping <- list(
                    "Dendritic Cells" = c("cDC2", "cDC1", "migratory DCs"),
                    "Keratinocytes" = c("Keratinocytes"),
                    "Langerhan Cells" = c("LC"), 
                    "Melanocytes" = c(),
                    "T-cell" = c("DETC", "T"))

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
oxa_org <- create_mapping_organism(organism = "Mus musculus", 
                target_class = "OXA", base_class = "ETOH",
                celltype_mapping = list(
                    "Dendritic Cells" = c("cDC2", "cDC1", "migratory DCs"),
                    "Keratinocytes" = c("Keratinocytes"),
                    "Langerhan Cells" = c("LC"), 
                    "Melanocytes" = c(),
                    "T-cell" = c("DETC", "T")),
                counts = oxazolone)

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Register a parallel backend (SnowParam works on Windows/macOS/Linux);
# on Linux you may prefer MulticoreParam.
BiocParallel::register(BiocParallel::SnowParam(workers = 2,
                        exportglobals = FALSE, progressbar = FALSE),
                        default = TRUE) 
# Combine all human training inputs into a list
superpathways <- list(dc_superpathway_input,
                      cytokine_superpathway_input)
# Fit Optimal asmbPLS-DA models + compute validation metrics
# parallel: logical vector indicating whether the CV design should be parallel
# npermut: number of permutations used for permutation based tests
# type : validation strategy (jackknife or subsampling)
# nsubsampling: number of subsampling repeats (only used when type includes
# subsampling).
superpathways_fit <- multiple_fitOptimal(
    superpathways,
    parallel = c(TRUE, FALSE), 
    npermut = c(100),
    type = c("jackknife","subsampling"), 
    nsubsampling = c(NULL, 5)
    )
# Restore serial backend (good practice)
BiocParallel::register(BiocParallel::SerialParam(), default = TRUE) 

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
# Compute singIST recapitulations for all fitted superpathways
recapitulations_multiple <- multiple_singISTrecapitulations(oxa_org,
                                                            superpathways_fit)

## ----echo = TRUE, message = FALSE, warning = FALSE----------------------------
final_output <- render_multiple_outputs(objects= list(recapitulations_multiple))

## -----------------------------------------------------------------------------
utils::sessionInfo()

