## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
results = 'markup',
dev = 'png',     # raster images to reduce HTML size
dpi = 72,
fig.retina = 1
)

## ----eval=FALSE---------------------------------------------------------------
# # Install from Bioconductor
# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# BiocManager::install("markeR")

## ----echo=FALSE---------------------------------------------------------------
library(markeR)

## ----example-gene-sets-vector, echo=FALSE-------------------------------------
# Gene set without direction
gene_set1 <- c("GeneA", "GeneB", "GeneC", "GeneD")

# Gene set with direction
gene_set2 <- data.frame(
  gene = c("GeneX", "GeneY", "GeneZ"),
  direction = c(1, -1, 1),
  stringsAsFactors = FALSE
)

# Combine both into a named list
gene_sets <- list(
  Set1 = gene_set1,
  Set2 = gene_set2
)

## ----show-gene-set, echo=TRUE-------------------------------------------------
# Example
gene_sets

## ----loadsig------------------------------------------------------------------
# Load example gene sets
data(genesets_example)

## ----loaddata-----------------------------------------------------------------
# Load example expression data
data(counts_example)
counts_example[1:5, 1:5]

## ----load_metadata------------------------------------------------------------
# Load example metadata
data(metadata_example)
head(metadata_example)

## ----fig.width=8, fig.height=4, warning=FALSE, message=FALSE------------------
# Quantification approach: scores
# Method: log2-median 
PlotScores(data = counts_example, 
           metadata = metadata_example, 
           gene_sets = genesets_example, 
           Variable="Condition",  
           method ="logmedian",    
           nrow = 1,    
           pointSize=4,  
           title="Marthandan et al. 2016", 
           widthTitle = 24,
           labsize=12, 
           titlesize = 12)  


## ----warning=FALSE, message=FALSE---------------------------------------------
Overall_Scores <- PlotScores(data = counts_example, 
                             metadata = metadata_example,  
                             gene_sets=genesets_example, 
                             Variable="Condition",  
                             method ="all",   
                             ncol = NULL, 
                             nrow = 1, 
                             widthTitle=30, 
                             limits = c(0,3.5),   
                             title="Marthandan et al. 2016", 
                             titlesize = 10,
                             ColorValues = list(heatmap=c("#F9F4AE", "#B44141"),
                                                volcano=signature_colors <- c(
                                                  HernandezSegura = "#A07395",               
                                                  REACTOME_CELLULAR_SENESCENCE = "#6B8E9E",  
                                                  LiteratureMarkers = "#CA7E45"             
                                                )
                             ),
                             mode="simple",
                             widthlegend=30, 
                             sig_threshold=0.05, 
                             cohen_threshold=0.6,
                             pointSize=6,
                             colorPalette="Paired")  

## ----Overall_Scores_heatmap, fig.width=10, fig.height=2, warning=FALSE, message=FALSE----
Overall_Scores$heatmap

## ----Overall_Scores_volcano, fig.width=6, fig.height=3, warning=FALSE, message=FALSE----
Overall_Scores$volcano

## ----roc_scores, fig.width=10, fig.height=3, warning=FALSE, message=FALSE-----
ROC_Scores(data = counts_example, 
           metadata = metadata_example, 
           gene_sets=genesets_example, 
           method = "all", 
           variable ="Condition",
           colors = c(logmedian = "#3E5587", ssGSEA = "#B65285", ranking = "#B68C52"), 
           grid = TRUE, 
           spacing_annotation=0.3, 
           ncol=NULL, 
           nrow=1,
           mode = "simple",
           widthTitle = 28,
           titlesize = 10,  
           title="Marthandan et al. 2016") 


## ----FDRSim, fig.width=12, fig.height=3,warning=FALSE, message=FALSE----------
set.seed("123456")
FPR_Simulation(data = counts_example,
               metadata = metadata_example,
               original_signatures = genesets_example,
               gene_list = row.names(counts_example),
               number_of_sims = 100,
               title = "Marthandan et al. 2016",
               widthTitle = 30,
               Variable = "Condition",
               titlesize = 12,
               pointSize = 5,
               labsize = 10,
               mode = "simple",
               ColorValues=NULL,
               ncol=NULL, 
               nrow=1 ) 


## ----DEGs, fig.width=10, fig.height=3, warning=FALSE, message=FALSE-----------
# Build design matrix from variables (Condition) and apply a contrast.
# The design matrix is constructed automatically using the variable "Condition".
DEGs <- calculateDE(data = counts_example,
                    metadata = metadata_example,
                    variables = "Condition",
                    contrasts = c("Senescent - Proliferative"))
DEGs$`Senescent-Proliferative`[1:5,]

## ----DEGsvolcano, fig.width=10, fig.height=3,warning=FALSE, message=FALSE-----
# Change order: gene sets in columns, contrast in rows
plotVolcano(DEGs, genes = genesets_example, 
            N = NULL,
            x = "logFC", y = "-log10(adj.P.Val)", pointSize = 2,
            color = "#6489B4", highlightcolor = "#05254A", highlightcolor_upreg = "#038C65", highlightcolor_downreg = "#8C0303",nointerestcolor = "#B7B7B7",
            threshold_y = NULL, threshold_x = NULL,
            xlab = NULL, ylab = NULL, ncol = NULL, nrow = NULL, title = "Marthandan et al. 2016",
            labsize = 10, widthlabs = 24, invert = TRUE)

## ----GSEA, warning=FALSE, message=FALSE---------------------------------------
GSEAresults <- runGSEA(DEGList = DEGs, 
                       gene_sets = genesets_example,
                       stat = NULL,
                       ContrastCorrection = FALSE)

GSEAresults

## ----GSEA_plotenrichment, fig.width=10, fig.height=3, warning=FALSE, message=FALSE----
plotGSEAenrichment(GSEA_results=GSEAresults, 
                   DEGList=DEGs, 
                   gene_sets=genesets_example, 
                   widthTitle=40, grid = TRUE, titlesize = 10, nrow=1, ncol=3) 

## ----GSEA_lollypop, fig.width=5, fig.height=4, out.width="60%", warning=FALSE, message=FALSE----
plotNESlollipop(GSEA_results=GSEAresults, 
                saturation_value=NULL, 
                nonsignif_color = "#F4F4F4", 
                signif_color = "red",
                sig_threshold = 0.05, 
                grid = FALSE, 
                nrow = NULL, ncol = NULL, 
                widthlabels=13, 
                title=NULL, titlesize=12) 

## -----------------------------------------------------------------------------
HernandezSegura_GeneSet <- list(HernandezSegura=genesets_example$HernandezSegura) 

## ----load_metadata_discovery--------------------------------------------------
data(metadata_example)
set.seed("123456")
metadata_example$Researcher <- sample(c("John","Ana","Francisca"),39, replace = TRUE)
metadata_example$DaysToSequencing <- sample(c(1:20),39, replace = TRUE)
head(metadata_example)

## ----GSEA_varassoc, fig.width=6, fig.height=6,warning=FALSE, message=FALSE , out.width="70%"----
VariableAssociation(
  data = counts_example,
  metadata = metadata_example,
  method = "GSEA",
  cols = c("Condition","Researcher","DaysToSequencing"),
  mode = "simple",
  gene_set = HernandezSegura_GeneSet,
  saturation_value = NULL,
  nonsignif_color = "white",
  signif_color = "red",
  sig_threshold = 0.05,
  widthlabels = 30,
  labsize = 10,
  titlesize = 14,
  pointSize = 5
) $plot


## ----variableassoc_score_sen, fig.width=7, fig.height=7,warning=FALSE, message=FALSE , out.width="70%"----
VariableAssociation(data = counts_example, 
                    metadata = metadata_example, 
                    method = "logmedian",
                    cols = c("Condition","Researcher","DaysToSequencing"),  
                    gene_set = HernandezSegura_GeneSet,
                    mode="simple",
                    nonsignif_color = "white", signif_color = "red", saturation_value=NULL,sig_threshold = 0.05,
                    widthlabels=30, labsize=10, titlesize=14, pointSize=5, discrete_colors=NULL,
                    continuous_color = "#8C6D03", color_palette = "Set2")$Overall 


## ----fig.width=6, fig.height=8------------------------------------------------

# Example data
signature1 <- c("TP53", "BRCA1", "MYC", "EGFR", "CDK2")
signature2 <- c("ATXN2", "FUS", "MTOR", "CASP3")

signature_list <- list(
  "User_Apoptosis" = c("TP53", "CASP3", "BAX"),
  "User_CellCycle" = c("CDK2", "CDK4", "CCNB1", "MYC"),
  "User_DNARepair" = c("BRCA1", "RAD51", "ATM"),
  "User_MTOR" = c("MTOR", "AKT1", "RPS6KB1")
)

geneset_similarity(
  signatures = list(Sig1 = signature1, Sig2 = signature2),
  other_user_signatures = signature_list,
  collection = "C2",
  subcollection = "CP:KEGG_LEGACY",
  metric = "odds_ratio",
  # Define gene universe (e.g., genes from HPA or your dataset)
  universe = unique(c(
    signature1, signature2,
    unlist(signature_list),
    msigdbr::msigdbr(species = "Homo sapiens", category = "C2")$gene_symbol
  )),
  or_threshold = 100, 
  width_text=50, 
  pval_threshold = 0.05
)$plot

## -----------------------------------------------------------------------------
sessionInfo()

