## ----environment setup, include = FALSE---------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  prompt = FALSE
)

library(dplyr)
library(knitr)
library(kableExtra)

## ----install , eval=FALSE-----------------------------------------------------
# if (!require("BiocManager", quietly=TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("DNEA")

## ----Quick Start, eval=TRUE, echo = TRUE--------------------------------------
#Load packages
library(DNEA)
library(BiocParallel)
#load the example data
data("TEDDY")
data("T1Dmeta")
aminos <- c("alanine", "arginine", "asparagine", "aspartic_acid", 
            "cysteine", "glutamine", "glutamic_acid", "glycine", 
            "histidine", "isoleucine", "leucine", "lysine", 
            "methionine", "phenylalanine", "proline", "serine", 
            "threonine", "tryptophan", "tyrosine", "valine")
keep <- rownames(TEDDY) %in% aminos
TEDDYreduced <- TEDDY[keep,]
#initiate BiocParallel 
BP_plan <- MulticoreParam(workers = 2, RNGseed = 417, progressbar = FALSE)

#re-order the metadata to match the sample order of expression_data
T1Dmeta <- T1Dmeta[colnames(TEDDYreduced),]

#save the group column to be used as group_labels
group_labels <- T1Dmeta$group

#name each element for its corresponding sample
names(group_labels) <- rownames(T1Dmeta)

#convert to factor
group_labels <- factor(group_labels, levels = c("DM:control", "DM:case"))

#Run DNEA
TEDDYdat <- createDNEAobject(project_name = "TEDDYmetabolomics",
                             expression_data = TEDDYreduced,
                             group_labels = group_labels)
TEDDYdat <- BICtune(object = TEDDYdat, informed = TRUE, 
                    interval = 1e-3, BPPARAM = BP_plan)
TEDDYdat <- stabilitySelection(object = TEDDYdat, subSample = TRUE, 
                               nreps = 1000, BPPARAM = BP_plan,
                               BPOPTIONS=bpoptions(tasks = 8))
TEDDYdat <- getNetworks(object = TEDDYdat, aprox = TRUE, informed = TRUE, 
                        interval = 1e-3, BPPARAM = BP_plan)
TEDDYdat <- clusterNet(object = TEDDYdat, tau = 0.5, verbose = FALSE)

TEDDYdat <- runNetGSA(TEDDYdat)


## ----load example data, eval=1:5, echo=TRUE-----------------------------------
#first load DNEA into R
library(DNEA)

#load the example data
data("TEDDY")

## ----TEDDY examine echo, eval=FALSE, echo=TRUE--------------------------------
# TEDDY[1:10, 1:4]

## ----TEDDY examine, echo=FALSE------------------------------------------------
TEDDY_table <- knitr::kable(TEDDY[1:10, 1:4])
kable_paper(TEDDY_table, "striped",
              position = "center", full_width = FALSE,
              bootstrap_options = "condensed")

#remove table
rm(TEDDY_table)

## ----load example metadata, eval=TRUE-----------------------------------------
#load T1Dmeta
data("T1Dmeta")

unique(T1Dmeta$group)

## ----T1Dmeta echo, eval=FALSE, echo=TRUE--------------------------------------
# #View the metadata
# T1Dmeta[1:10, c(1,5,6,7)]

## ----T1Dmeta examine, echo=FALSE----------------------------------------------
T1Dmeta_table <- knitr::kable(T1Dmeta[1:10, c(1,5,6,7)])
kable_paper(T1Dmeta_table, "striped",
              position = "center", full_width = FALSE,
              bootstrap_options = "condensed")

#remove table
rm(T1Dmeta_table)

## ----create group_labels, eval=TRUE-------------------------------------------
#re-order the metadata to match the sample order of expression_data
T1Dmeta <- T1Dmeta[colnames(TEDDY),]

#save the group column to be used as group_labels
group_labels <- T1Dmeta$group

#name each element for its corresponding sample
names(group_labels) <- rownames(T1Dmeta)

#convert to factor
group_labels <- factor(group_labels, levels = c("DM:control", "DM:case"))

## ----start DNEA, eval=TRUE----------------------------------------------------
#initiate DNEA object
TEDDYdat <- createDNEAobject(project_name = "TEDDYmetabolomics",
                             expression_data = TEDDY,
                             group_labels = group_labels)

## ----correlation heatmap, eval=TRUE, message=FALSE----------------------------
#load the pheatmap and Hmisc packages
library(pheatmap)
library(Hmisc)

#grab the DM:case data from the DNEA object
expr_dat <- expressionData(TEDDYdat, assay = "log_scaled_data")[["DM:case"]]

#create a pearson correlation matrix - data should be transposed first so features are in columns
cor_dat <- Hmisc::rcorr(t(expr_dat), type = "pearson")$r

#cluster the correlations and reorder correlation matrix to better visualize
  dd <- as.dist((1-cor_dat)/2)
  hc <- hclust(dd)
  cor_dat <-cor_dat[hc$order, hc$order]
  
  #create pheatmap
  pheatmap(cor_dat, cluster_rows = FALSE,cluster_cols = FALSE,
         legend = TRUE,annotation_legend = FALSE,
         labels_row = '',labels_col = '',
         main = 'Feature correlations in DM:case group'
)


## ----DNEA summary, eval=TRUE, echo=TRUE---------------------------------------
#show summary of DNEA object
TEDDYdat

## ----node list examine echo, eval=FALSE, echo=TRUE----------------------------
# #access node list
# nodeList(TEDDYdat)[1:5,]

## ----node list examine, echo=FALSE--------------------------------------------
nodelist_table <- knitr::kable(nodeList(TEDDYdat)[1:5,])
kable_paper(nodelist_table, "striped",
              position = "center", full_width = FALSE,
              bootstrap_options = "condensed")

#remove table
rm(nodelist_table)

## ----setup input for aggregateFeatures, eval=TRUE-----------------------------
#save metabolite names
metab_names <- rownames(expressionData(TEDDYdat, assay = "input_data"))

#create feature_group data.frame
TEDDY_groups <- data.frame(features = metab_names,
                           groups = metab_names,
                           row.names = metab_names)

#create labels
TEDDY_groups$groups[TEDDY_groups$groups %in% c("isoleucine",
                                               "leucine",
                                               "valine")] <- "BCAAs"
TEDDY_groups$groups[grepl("acid",
                          TEDDY_groups$groups)] <- "fatty_acids"


## ----TEDDY groups echo, eval=FALSE, echo=TRUE---------------------------------
# #take a look at the group labels
# TEDDY_groups[1:10, ]

## ----node collapse groups, echo=FALSE-----------------------------------------
netGSA_table <- knitr::kable(TEDDY_groups[1:10, ])
kable_paper(netGSA_table, "striped",
              position = "center", full_width = FALSE,
              bootstrap_options = "condensed")

## ----run aggregateFeatures, eval=TRUE-----------------------------------------
#perform feature collapsing
collapsed_TEDDY <- aggregateFeatures(object = TEDDYdat,
                                  method = "hybrid",
                                  correlation_threshold = 0.7,
                                  feature_groups = TEDDY_groups)

collapsed_TEDDY

## ----addExpressionData, eval=TRUE---------------------------------------------

#log-transform and transpose the TEDDY data
TEDDY <- t(log(TEDDY))

#make sure metadata and expression data are in same order
T1Dmeta <- T1Dmeta[rownames(TEDDY),]

dat <- list('DM:control' = TEDDY[T1Dmeta$group == "DM:control",],
            'DM:case' = TEDDY[T1Dmeta$group == "DM:case",])

#log-transform and median center the expression data without scaling
newdat <- list()
for(cond in seq(length(dat))){

  group_dat <- dat[[cond]]
  for(i in seq(1, ncol(group_dat))){
    metab_median = median(group_dat[, i], na.rm = TRUE)
    metab_range = range(group_dat[, i], na.rm = TRUE)
    scale_factor = max(abs(metab_range - metab_median))
    group_dat[, i] <- (group_dat[, i] - metab_median) / scale_factor

    rm(metab_median, metab_range, scale_factor)
  }

  group_dat <- group_dat[rownames(dat[[cond]]),colnames(dat[[cond]])]
  group_dat <- t(group_dat)
  newdat <- append(newdat, list(group_dat))

  rm(i, group_dat)
}

#add names
names(newdat) <- names(dat)

#add data
TEDDYdatCustomInput <- addExpressionData(object = TEDDYdat,
                                         dat = newdat,
                                         assay_name="median_scaled_data")

## ----BiocParallel, eval=TRUE--------------------------------------------------
#load in BiocParallel
library(BiocParallel)

#create parallel sockets
BPPARAM <- BiocParallel::SnowParam(workers = 2, RNGseed = 417, progressbar = TRUE)

## ----load processed data, include=FALSE---------------------------------------

#load processed results since 500 reps would take ~28 min
data("dnw")
TEDDYdat <- dnw
rm(dnw)

## ----BICtune, eval = FALSE, echo = TRUE, strip.white=TRUE---------------------
# 
# #optimize lambda
# TEDDYdat <- BICtune(TEDDYdat,
#                     informed = TRUE,
#                     interval = 1e-3,
#                     BPPARAM = BPPARAM,
#                     BPOPTIONS = bpoptions(progressbar = FALSE))

## ----BICtune message, eval=TRUE, echo=FALSE, strip.white=TRUE-----------------
message("Optimizing the lambda hyperparameter using Bayesian-Information ",
        "Criterion outlined in Guo et al. (2011)")
message("A Link to this reference can be found in the function documentation ",
        "by running ?BICtune() in the console")
message("The log_scaled_data expression data will be used for analysis.\n")
message("Estimating optimal c constant range for asymptotic lambda...\n")
message("Fine-tuning Lambda...\n")
message("The optimal Lambda hyper-parameter has been set to: 0.05072355!")

## ----BIC plot, eval=TRUE------------------------------------------------------
#load ggplot2
library(ggplot2)
#create data frame of values
BICtuneData <- rbind(data.frame(lambda = unlist(lambdas2Test(TEDDYdat)),
                          value = vapply(BICscores(TEDDYdat), function(x) x$BIC, double(1)),
                          score = rep("BIC", length(lambdas2Test(TEDDYdat)))),
                     data.frame(lambda = unlist(lambdas2Test(TEDDYdat)),
                          value = vapply(BICscores(TEDDYdat), function(x) x$likelihood, double(1)),
                          score = rep("likelihood", length(lambdas2Test(TEDDYdat)))))

#create plot
ggplot(data = BICtuneData, aes(x = lambda, y = value)) + 
  geom_line(aes(linetype = score)) + 
  geom_point(aes(shape = score)) +
  geom_vline(xintercept = optimizedLambda(TEDDYdat), color = "red") +
  ggtitle("BIC and Likelihood Scores with Respect to Lambda")


## ----stabsel show, eval = FALSE, echo = TRUE, strip.white=TRUE----------------
# #perform stability selection
# TEDDYdat <- stabilitySelection(TEDDYdat,
#                                subSample = TRUE,
#                                nreps = 1000,
#                                BPPARAM = BPPARAM,
#                                BPOPTIONS = bpoptions(progressbar=FALSE))

## ----stabsel message, eval=TRUE, echo=FALSE, strip.white=TRUE-----------------
message('The lambda value stored in the DNEA will be used for analysis (this can be ',
            'accessed via the optimizedLambda() function')
message("Using Lambda hyper-parameter: ", optimizedLambda(TEDDYdat), "!\n",
          "stabilitySelection will be performed with 1000 replicates!")
message("The log_scaled_data expression data will be used for analysis.\n")
message("Additional sub-sampling will be performed on uneven groups\n")
message("Calculating selection probabilities WITH subsampling for...DM:case...\n")
message("Calculating selection probabilities WITH subsampling for...DM:control...")

## ----getNetworks, eval = TRUE-------------------------------------------------
#jointly estimate the biological networks
TEDDYdat <- getNetworks(TEDDYdat, aprox = TRUE)

## ----edgeList, eval=FALSE, echo=TRUE------------------------------------------
# #save edge list to new object
# edge_list <- edgeList(TEDDYdat)
# 
# #access the edge list
# edge_list[1:10,]

## ----edge table, echo=FALSE---------------------------------------------------
#save edge list to new object
edge_list <- edgeList(TEDDYdat)

#create kable
edge_table <- knitr::kable(edge_list[1:10,])
kable_paper(edge_table, "striped",
              position = "center", full_width = FALSE,
              bootstrap_options = "condensed")

#clean up space
rm(edge_table)

## ----filterNetworks, eval=FALSE, echo=TRUE------------------------------------
# #filter networks based on an absolute threshold of pcor = 0.166
# TEDDYdat_filtered <- filterNetworks(TEDDYdat, pcor = 0.166)

## ----consensus cluster, eval = TRUE, warning=FALSE----------------------------
#set the seed
set.seed(417)

#perform consensus clustering
TEDDYdat <- clusterNet(TEDDYdat, tau = 0.5,
                       max_iterations = 5,
                       verbose = FALSE)

## ----CCsummary show, eval = FALSE---------------------------------------------
# #view subnetwork summary
# CCsummary(TEDDYdat)

## ----CCsummary kable, echo=FALSE----------------------------------------------
clust_table <- knitr::kable(CCsummary(TEDDYdat))
kable_paper(clust_table, "striped",
              position = "center", full_width = FALSE,
              bootstrap_options = "condensed")

## ----runNetGSA, eval = 1:2, echo=TRUE-----------------------------------------
#perform pathway enrichment using netgsa
TEDDYdat <- runNetGSA(TEDDYdat, min_size = 5)

## ----netGSAresults show, eval = FALSE-----------------------------------------
# #access netGSA results
# netGSAresults(TEDDYdat)

## ----netGSAresults kable, echo=FALSE------------------------------------------
netGSA_table <- knitr::kable(netGSAresults(TEDDYdat))
kable_paper(netGSA_table, "striped",
              position = "center", full_width = FALSE,
              bootstrap_options = "condensed")

## ----plotNetworks biological networks-----------------------------------------
#names of our experimental conditions
networkGroups(TEDDYdat)

#create side by side plots
par(mfrow = c(1,3))

#plot networks
plotNetworks(TEDDYdat, 
             type = "group_networks",
             subtype = "DM:control",
             main = "DM:control Network")
plotNetworks(TEDDYdat, 
             type = "group_networks",
             subtype = "All",
             main = "Joint Network")
plotNetworks(TEDDYdat, 
             type = "group_networks", 
             subtype = "DM:case",
             main = "DM:case Network")

## ----plotNetworks sub networks, message=FALSE---------------------------------
#load igraph
library(igraph)

#create side by side plots
par(mfrow = c(1,2))

#plot subnetworks
plotNetworks(TEDDYdat, 
             type = "sub_networks", 
             subtype = 1, 
             layout_func = layout_in_circle,
             main = "Sub Network 1")
plotNetworks(TEDDYdat, 
             type = "sub_networks", 
             subtype = 2, 
             layout_func = layout_in_circle,
             main = "Sub Network 2")

## ----getNetworkFiles, eval=FALSE----------------------------------------------
# #save files for cytoscape
# getNetworkFiles(TEDDYdat)

## ----session info, eval=TRUE--------------------------------------------------
sessionInfo()