## ----knitr, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  cache = FALSE
)

## ----setup, eval = FALSE------------------------------------------------------
# # install from bioconductor
# if (!require(BiocManager)) {
#   install.packages('BiocManager')
#   BiocManager::install('leapR')
# }
# 

## ----libraries, message=FALSE, warning = FALSE--------------------------------
# load the core libraries
library(leapR)
library(gplots)
library(rmarkdown)
# plotting helpers used in this vignette
library(ggplot2)
library(dplyr)
library(tibble)
library(stringr)

## ----pathdb-------------------------------------------------------------------
data(ncipid)

## ----omicsdata, message=FALSE, warning=FALSE, echo = FALSE--------------------
url <- "https://api.figshare.com/v2/file/download/56536217"
pdata <- download.file(url, method = "libcurl", destfile = "protData.rda")
#  as.matrix()
load("protData.rda")

p <- file.remove("protData.rda")

url <- "https://api.figshare.com/v2/file/download/56536214"
tdata <- download.file(url, method = "libcurl", destfile = "transData.rda")
load("transData.rda")
p <- file.remove("transData.rda")

url <- "https://api.figshare.com/v2/file/download/56536211"
phdata <- download.file(url, method = "libcurl", destfile = "phosData.rda")
load("phosData.rda")
p <- file.remove("phosData.rda")

## ----show_pset_example--------------------------------------------------------
# Inspect the `pset` SummarizedExperiment.
str(pset)
dim(SummarizedExperiment::assay(pset, "proteomics"))
head(rownames(pset))
head(colnames(pset))

## ----ptlists------------------------------------------------------------------
data(shortlist)
data(longlist)

## columns that we want to use for results

cols_to_display <- c("ingroup_n", "outgroup_n", "background_n", 
                     "pvalue", "BH_pvalue")

## ----abundance, echo=TRUE, warning=FALSE, message=FALSE-----------------------
# in this example we lump a bunch of patients together (the 'short survivors')
# and compare them to another group (the 'long survivors')

### using enrichment_wrapper function
protdata.enrichment.svl <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_comparison",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)

or <- order(unlist(protdata.enrichment.svl[, "pvalue"]))
rmarkdown::paged_table(protdata.enrichment.svl[or, cols_to_display])
# another application is to compare just one patient against another
# (this would be the  equivalent of comparing one time point to another)

### using enrichment_wrapper function
protdata.enrichment.svl.ovo <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_comparison",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist[1],
  secondary_columns = longlist[1]
)
or <- order(unlist(protdata.enrichment.svl.ovo[, "pvalue"]))
rmarkdown::paged_table(protdata.enrichment.svl.ovo[or, cols_to_display])

## ----fishers, echo=TRUE, warning=FALSE, message=FALSE-------------------------
# for this example we will construct a list of genes from the expression data
#     to emulate what you might be inputting
genelist <- rownames(pset)[which(SummarizedExperiment::assay(pset, 
                                                    "proteomics")[, 1] > 0.5)]

protdata.enrichment.sets.test <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_in_sets",
  eset = pset,
  assay_name = "proteomics",
  targets = genelist
)
or <- order(protdata.enrichment.sets.test[, "pvalue"])
rmarkdown::paged_table(protdata.enrichment.sets.test[or, cols_to_display])


# in this example we construct some modules from the hierarchical clustering
#   of the   data
protdata_naf <- SummarizedExperiment::assay(pset, "proteomics")

# hierarchical clustering is not too happy with lots of missing values
#    so we'll do a zero fill on this to get the modules
protdata_naf[which(is.na(protdata_naf))] <- 0

# construct the hierarchical clustering using the 'wardD' method, which
#    seems to give more even sized modules
protdata_hc <- hclust(dist(protdata_naf), method = "ward.D2")

# arbitrarily we'll chop the clusters into 5 modules
modules <- cutree(protdata_hc, k = 5)

## sara: created list
clusters <- lapply(unique(modules), function(x) names(which(modules == x)))

# modules is a named list of values where each value is a module
#         number and the name is the gene name

# To do enrichment for one module (module 1 in this case) do this
protdata.enrichment.sets.module_1 <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "enrichment_in_sets",
  eset = pset,
  assay_name = "proteomics",
  targets = names(modules[which(modules == 1)])
)

# To do enrichment on all modules and return the list of enrichment results
protdata.enrichment.sets.modules <- do.call(rbind, 
                                            leapR::cluster_enrichment(
                                                eset = pset,
                                                assay_name= 'proteomics',
                                                geneset = ncipid,
                                                clusters = clusters, 
                                                sigfilter = 0.25))
## nothing is enriched
rmarkdown::paged_table(protdata.enrichment.sets.modules[, cols_to_display])

## ----fishers_plot, echo=TRUE, warning=FALSE, message=FALSE--------------------
# Plot the top enriched gene sets from Fisher's exact test
# Stars indicate significance (None seen here)
plot_leapr_bar(
  protdata.enrichment.sets.test,
  title = "Fisher's Exact Test: Top Enriched Pathways",
  top_n = 10,
  star_thresholds = c(0.05, 0.01, 0.001),
  wrap = 40
)

## ----ks, echo=TRUE, warning = FALSE, message=FALSE----------------------------
# This is how you calculate enrichment in a ranked list 
# (for example from topology)
### using enrichment_wrapper function
protdata.enrichment.order <- leapR::leapR(
  geneset = ncipid, "enrichment_in_order",
  eset = pset,
  method = 'ks',
  assay_name = "proteomics",
  primary_columns = shortlist[1]
)


or <- order(protdata.enrichment.order[, "pvalue"])
rmarkdown::paged_table(protdata.enrichment.order[or, cols_to_display])

## ----ks_plot, echo=TRUE, warning = FALSE, message=FALSE-----------------------
# Plot the ranked enrichment results
plot_leapr_bar(
  protdata.enrichment.order,
  title = "Kolmogorov-Smirnov Test: Ranked Enrichment",
  top_n = 12,
  fill_sig = "#2166AC",
  fill_ns = "#B2DFDB",
  wrap = 38
)

## ----zt, echo=TRUE, warning = FALSE, message=FALSE----------------------------
# This is how you calculate enrichment in a ranked list 
# (for example from topology)
### using enrichment_wrapper function
protdata.enrichment.order <- leapR::leapR(
  geneset = ncipid, "enrichment_in_order",
  eset = pset,
  method = 'ztest',
  assay_name = "proteomics",
  primary_columns = shortlist[1]
)


or <- order(protdata.enrichment.order[, "pvalue"])
rmarkdown::paged_table(protdata.enrichment.order[or, cols_to_display])

plot_leapr_bar(
  protdata.enrichment.order,
  title = "Z Test: Ranked Enrichment",
  top_n = 12,
  fill_sig = "#2166AC",
  fill_ns = "#B2DFDB",
  wrap = 38
)

## ----correlation, echo=TRUE, warning=FALSE, message=FALSE---------------------
### using enrichment_wrapper function
protdata.enrichment.correlation <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "correlation_enrichment",
  assay_name = "proteomics",
  eset = pset
)

or <- order(protdata.enrichment.correlation[, "pvalue"])
rmarkdown::paged_table(head(protdata.enrichment.correlation[or, 
                                                      cols_to_display]))

protdata.enrichment.correlation.short <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "correlation_enrichment",
  assay_name = "proteomics",
  eset = pset[, shortlist]
)
or <- order(protdata.enrichment.correlation.short[, "pvalue"])
rmarkdown::paged_table(head(protdata.enrichment.correlation.short[or, 
                                                        cols_to_display]))

protdata.enrichment.correlation.long <- leapR::leapR(
  geneset = ncipid,
  enrichment_method = "correlation_enrichment",
  assay_name = "proteomics",
  eset = pset[, longlist]
)
or <- order(protdata.enrichment.correlation.long[, "pvalue"])
rmarkdown::paged_table(head(protdata.enrichment.correlation.long[or, 
                                                            cols_to_display]))

## ----corr_plot, echo=TRUE, warning=FALSE, message=FALSE-----------------------
# Compare correlation patterns across conditions
plot_leapr_bar(
  protdata.enrichment.correlation.short,
  title = "Correlation Enrichment: Short Survivors",
  top_n = 10,
  fill_sig = "#1B9E77",
  fill_ns = "#D8F0E8",
  wrap = 36
)

## ----phosphoproteomics, echo=TRUE, warning=FALSE, message=FALSE---------------
data("kinasesubstrates")

# for an individual tumor calculate the Kinase-Substrate 
# Enrichment (similar to KSEA)
#     This uses the site-specific phosphorylation data to determine 
# which kinases
#     might be active by assessing the enrichment of the 
# phosphorylation of their known substrates

phosphodata.ksea.order <- leapR::leapR(
  geneset = kinasesubstrates,
  enrichment_method = "enrichment_in_order",
  assay_name = "phosphoproteomics",
  eset = phset,
  method = 'ztest', 
  primary_columns = "TCGA-13-1484")

or <- order(phosphodata.ksea.order[, "pvalue"])
rmarkdown::paged_table(phosphodata.ksea.order[or, cols_to_display])


# now do the same thing but use a threshold
phosphodata.sets.order <- leapR::leapR(
  geneset = kinasesubstrates,
  enrichment_method = "enrichment_in_sets",
  eset = phset,
  assay_name = "phosphoproteomics",
  threshold = 0.5,
  primary_columns = "TCGA-13-1484"
)
or <- order(phosphodata.sets.order[, "pvalue"])
rmarkdown::paged_table(phosphodata.sets.order[or, cols_to_display])

## ----phos_plot, echo=TRUE, warning=FALSE, message=FALSE-----------------------
plot <- plot_leapr_bar(
  phosphodata.sets.order,
  title            = "Kinase Substrate Enrichment (Phosphoproteomics)",
  top_n            = 15,
  star_thresholds  = c(0.05, 0.01, 1e-3),
  wrap             = 36,
  fill_sig         = "#0F766E",  # dark teal for significant
  fill_ns          = "#99F6E4",  # light teal for non-significant
  outline          = "grey30",
  axis_text_y_size = 7,
  axis_text_x_size = 8
)

plot

# You can also modify the plot further using standard ggplot2 arguments
plot + ggplot2::labs(
  y = expression(-log[10]("adjusted p-value")),
  caption = "* p<0.05, ** p<0.01, *** p<0.001"
)

## ----session------------------------------------------------------------------
sessionInfo()