## ----knitr, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  cache = FALSE
)

## ----libraries, message=FALSE, warning = FALSE--------------------------------
# load the core libraries
library(leapR)
library(gplots)
library(rmarkdown)
# plotting helpers used in this vignette
library(ggplot2)
library(dplyr)
library(tibble)
library(stringr)

## ----omicsdata, message=FALSE, warning=FALSE----------------------------------
url <- "https://api.figshare.com/v2/file/download/56536217"
pdata <- download.file(url, method = "libcurl", destfile = "protData.rda")
#  as.matrix()
load("protData.rda")

p <- file.remove("protData.rda")

url <- "https://api.figshare.com/v2/file/download/56536214"
tdata <- download.file(url, method = "libcurl", destfile = "transData.rda")
load("transData.rda")
p <- file.remove("transData.rda")

url <- "https://api.figshare.com/v2/file/download/56536211"
phdata <- download.file(url, method = "libcurl", destfile = "phosData.rda")
load("phosData.rda")
p <- file.remove("phosData.rda")

## ----load lists---------------------------------------------------------------

data(shortlist)
data(longlist)
data(ncipid)
data(kinasesubstrates)
## columns that we want to use for results

cols_to_display <- c("ingroup_n", "outgroup_n", "background_n", 
                     "pvalue", "BH_pvalue")


## ----figure_2, echo=TRUE, warning=FALSE, message=FALSE, error=FALSE-----------
# load the single omic and multi-omic pathway databases
data("krbpaths")
data("mo_krbpaths")

# comparison enrichment in transcriptional data
transdata.comp.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_comparison",
  eset = tset,
  assay_name = "transcriptomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)

# comparison enrichment in proteomics data
# this is the same code used above, just repeated here for clarity
protdata.comp.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_comparison",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)

# comparison enrichment in phosphoproteomics data
phosphodata.comp.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_comparison",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = shortlist,
  secondary_columns = longlist, id_column = "hgnc_id"
)


# set enrichment in transcriptomics data
# perform the comparison t-test
tset <- leapR::calcTTest(tset, assay_name = "transcriptomics", 
                         shortlist, longlist)


## now we need to run enrichment in sets with target list, not eset
transdata.set.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  eset = tset,
  assay_name = "transcriptomics",
  enrichment_method = "enrichment_in_sets",
  primary_columns = "pvalue",
  greaterthan = FALSE, threshold = 0.05
)


pset <- leapR::calcTTest(pset, assay_name = "proteomics",
                         shortlist, longlist)

protdata.set.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  eset = pset,
  assay_name = "proteomics",
  enrichment_method = "enrichment_in_sets",
  primary_columns = "pvalue",
  greaterthan = FALSE, threshold = 0.05
)


phset <- leapR::calcTTest(phset, assay_name = "phosphoproteomics", 
                          shortlist, longlist)

phosphodata.set.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_sets",
  id_column = "hgnc_id",
  assay_name = "phosphoproteomics",
  eset = phset, primary_columns = "pvalue",
  greaterthan = FALSE, threshold = 0.05
)

# order enrichment in transcriptomics data
transdata.order.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_order",
  eset = tset,
  assay_name = "transcriptomics",
  primary_columns = "difference"
)

# order enrichment in proteomics data
protdata.order.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_order",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = "difference"
)

# order enrichment in phosphoproteomics data


phosphodata.order.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_order",
  id_column = "hgnc_id",
  method = 'ztest', 
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = "difference"
)

# correlation difference in transcriptomics data
transdata.corr.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "correlation_comparison",
  eset = tset,
  assay_name = "transcriptomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)
# correlation difference in proteomics data
protdata.corr.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "correlation_comparison",
  eset = pset,
  assay_name = "proteomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)
# correlation difference in phosphoproteomics data
phosphodata.corr.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "correlation_comparison",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = shortlist,
  secondary_columns = longlist, id_column = "hgnc_id"
)

# combine the omics data into one with prefix tags
comboset <- leapR::combine_omics(list(pset, phset, tset), 
                                 c(NA, "hgnc_id", NA))

# comparison enrichment for combodata
# when we use expression set, we do not need to use the mo_krbpaths 
#since the  id mapping column is used
combodata.enrichment.svl <- leapR::leapR(
  geneset = krbpaths, # mo_krbpaths,
  enrichment_method = "enrichment_comparison",
  eset = comboset,
  assay_name = "combined",
  primary_columns = shortlist,
  secondary_columns = longlist, id_column = "id"
)


# set enrichment in combo data
# perform the comparison t test
comboset <- leapR::calcTTest(comboset,
                             assay_name = "combined", 
                             shortlist, longlist)

combodata.set.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_sets",
  eset = comboset, primary_columns = "pvalue",
  assay_name = "combined",
  id_column = "id",
  greaterthan = FALSE, threshold = 0.05
)

# order enrichment in combo data
combodata.order.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "enrichment_in_order",
  assay_name = "combined",
  eset = comboset, primary_columns = "difference",
  id_column = "id"
)

# correlation difference in combo data
combodata.corr.enrichment.svl <- leapR::leapR(
  geneset = krbpaths,
  enrichment_method = "correlation_comparison",
  eset = comboset,
  assay_name = "combined",
  primary_columns = shortlist,
  id_column = "id",
  secondary_columns = longlist
)


# now take all these results and combine them into one figure
all_results <- list(
  transdata.comp.enrichment.svl,
  protdata.comp.enrichment.svl,
  phosphodata.comp.enrichment.svl,
  combodata.enrichment.svl,
  transdata.set.enrichment.svl,
  protdata.set.enrichment.svl,
  phosphodata.set.enrichment.svl,
  combodata.set.enrichment.svl,
  transdata.order.enrichment.svl,
  protdata.order.enrichment.svl,
  phosphodata.order.enrichment.svl,
  combodata.order.enrichment.svl,
  transdata.corr.enrichment.svl,
  protdata.corr.enrichment.svl,
  phosphodata.corr.enrichment.svl,
  combodata.corr.enrichment.svl
)

pathways_of_interest <- c(
  "KEGG_APOPTOSIS",
  "KEGG_CELL_CYCLE",
  "KEGG_ERBB_SIGNALING_PATHWAY",
  "KEGG_FOCAL_ADHESION",
  "KEGG_INSULIN_SIGNALING_PATHWAY",
  "KEGG_MAPK_SIGNALING_PATHWAY",
  "KEGG_MISMATCH_REPAIR",
  "KEGG_MTOR_SIGNALING_PATHWAY",
  "KEGG_OXIDATIVE_PHOSPHORYLATION",
  "KEGG_P53_SIGNALING_PATHWAY",
  "KEGG_PATHWAYS_IN_CANCER",
  "KEGG_PROTEASOME",
  "KEGG_RIBOSOME",
  "KEGG_VEGF_SIGNALING_PATHWAY",
  "KEGG_WNT_SIGNALING_PATHWAY"
)


results.frame <- data.frame(
  pathway = pathways_of_interest,
  td.comp = all_results[[1]][pathways_of_interest, "BH_pvalue"] < 0.05,
  pd.comp = all_results[[2]][pathways_of_interest, "BH_pvalue"] < 0.05,
  fd.comp = all_results[[3]][pathways_of_interest, "BH_pvalue"] < 0.05,
  cd.comp = all_results[[4]][pathways_of_interest, "BH_pvalue"] < 0.05,
  td.set = all_results[[5]][pathways_of_interest, "BH_pvalue"] < 0.05,
  pd.set = all_results[[6]][pathways_of_interest, "BH_pvalue"] < 0.05,
  fd.set = all_results[[7]][pathways_of_interest, "BH_pvalue"] < 0.05,
  cd.set = all_results[[8]][pathways_of_interest, "BH_pvalue"] < 0.05,
  td.order = all_results[[9]][pathways_of_interest, "BH_pvalue"] < 0.05,
  pd.order = all_results[[10]][pathways_of_interest, "BH_pvalue"] < 0.05,
  fd.order = all_results[[11]][pathways_of_interest, "BH_pvalue"] < 0.05,
  cd.order = all_results[[12]][pathways_of_interest, "BH_pvalue"] < 0.05,
  td.corr = all_results[[13]][pathways_of_interest, "BH_pvalue"] < 0.05,
  pd.corr = all_results[[14]][pathways_of_interest, "BH_pvalue"] < 0.05,
  fd.corr = all_results[[15]][pathways_of_interest, "BH_pvalue"] < 0.05,
  cd.corr = all_results[[16]][pathways_of_interest, "BH_pvalue"] < 0.05
)

results.frame.or <- data.frame(
  pathway = pathways_of_interest,
  td.comp = all_results[[1]][pathways_of_interest, "oddsratio"],
  pd.comp = all_results[[2]][pathways_of_interest, "oddsratio"],
  fd.comp = all_results[[3]][pathways_of_interest, "oddsratio"],
  cd.comp = all_results[[4]][pathways_of_interest, "oddsratio"],
  td.set = log(all_results[[5]][pathways_of_interest, "oddsratio"], 2),
  pd.set = log(all_results[[6]][pathways_of_interest, "oddsratio"], 2),
  fd.set = log(all_results[[7]][pathways_of_interest, "oddsratio"], 2),
  cd.set = log(all_results[[8]][pathways_of_interest, "oddsratio"], 2),
  td.order = all_results[[9]][pathways_of_interest, "oddsratio"],
  pd.order = all_results[[10]][pathways_of_interest, "oddsratio"],
  fd.order = all_results[[11]][pathways_of_interest, "oddsratio"],
  cd.order = all_results[[12]][pathways_of_interest, "oddsratio"],
  td.corr = all_results[[13]][pathways_of_interest, "oddsratio"],
  pd.corr = all_results[[14]][pathways_of_interest, "oddsratio"],
  fd.corr = all_results[[15]][pathways_of_interest, "oddsratio"],
  cd.corr = all_results[[16]][pathways_of_interest, "oddsratio"]
)

rownames(results.frame) <- results.frame[, 1]
rownames(results.frame.or) <- results.frame.or[, 1]
results.frame.sig <- results.frame[, 2:17] * results.frame.or[, 2:17]

heatmap.2(as.matrix(results.frame.sig[, c(1:4, 9:16)]), Colv = NA, 
          trace = "none", breaks = c(-1, -.1, -0.0001, 0, 0.1, 1), 
          col = c("blue", "lightblue", "grey", "pink", "red"), 
          dendrogram = "none")

## ----figure_3, warning=FALSE, message=FALSE-----------------------------------
# this comparison of abundance in substrates between case and control
#     is lopsided in the sense that phosphorylation levels were previously
#     reported to be overall higher in the short survivors. Thus the
#     results are not terribly interesting (all kinases are in the same 
# direction)
phosphodata.ksea.comp.svl <- leapR::leapR(
  geneset = kinasesubstrates,
  enrichment_method = "enrichment_comparison",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = shortlist, secondary_columns = longlist
)


# thus for the example we'll look at correlation between known substrates 
#   in the   case v control conditions
phosphodata.ksea.corr.svl <- leapR::leapR(
  geneset = kinasesubstrates,
  enrichment_method = "correlation_comparison",
  eset = phset,
  assay_name = "phosphoproteomics",
  primary_columns = shortlist,
  secondary_columns = longlist
)

# for the example we are using an UNCORRECTED PVALUE
#     which will allow us to plot more values, but
#     for real applications it's necessary to use the
#     CORRECTED PVALUE

# here are all the kinases *significant (*uncorrected) from the analysis
or <- order(phosphodata.ksea.corr.svl[, "pvalue"])
ksea_result <- phosphodata.ksea.corr.svl[or, ][1:9, ]
ksea_cols <- rep("grey", 9)
ksea_cols[which(ksea_result[, "oddsratio"] > 0)] <- "black"

# plot left panel: correlation significance of top most significant kinases
barplot(ksea_result[, "oddsratio"],
  horiz = TRUE, xlim = c(-1, 0.5),
  names.arg = rownames(ksea_result), las = 1, col = ksea_cols
)

# plot right panel: abundance comparison results of the same kinases
barplot(phosphodata.ksea.comp.svl[rownames(ksea_result), "oddsratio"],
  horiz = TRUE, names.arg = rownames(ksea_result), las = 1, col = "black"
)