## ----chunk_setup, include = FALSE---------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>",
    fig.crop = FALSE
)

## ----load_libraries, message=FALSE, warning=FALSE-----------------------------
# 1. Install MetaProViz from Bioconductor devel:
# if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
# BiocManager::install(version = "devel")
# BiocManager::install("MetaProViz")
# 2. Install the latest development version from GitHub using devtools
# remotes::install_github("saezlab/MetaProViz") # Install Rtools if you haven’t done this yet, using the appropriate version (e.g.windows or macOS).

library(MetaProViz)

# dependencies that need to be loaded:
library(magrittr)
library(dplyr)
library(rlang)
library(ggfortify)
library(tibble)

## ----load_data----------------------------------------------------------------
data(intracell_raw)

Intra <- intracell_raw%>%
column_to_rownames("Code")

## ----show_data_preview, echo=FALSE--------------------------------------------
# https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html
# Check how our data looks like:
Intra[1:5, c(1:4,21,44)]%>%
kableExtra::kbl(caption = "Preview of the DF `Intra` including columns with sample information and metabolite ids with their measured values.") %>%
kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12) #%>%
#kableExtra::scroll_box(width = "100%", height = "200px")

## ----processing, fig.width=6, fig.height=4.5, fig.align="left"----------------
PreprocessingResults <- processing(data=Intra[-c(49:58) ,-c(1:3)], #remove pool samples and columns with sample information
metadata_sample=Intra[-c(49:58) , c(1:3)], #remove pool samples and columns with metabolite measurements
metadata_info = c(Conditions = "Conditions",
Biological_Replicates = "Biological_Replicates"),
featurefilt = "Modified",
cutoff_featurefilt = 0.8,
tic = TRUE,
mvi = TRUE,
hotellins_confidence = 0.99,# We perform outlier testing using 0.99 confidence intervall
core = FALSE,
save_plot = "svg",
save_table= "csv",
print_plot = TRUE,
path = NULL)


# This is the results table:
Intra_Preprocessed <- PreprocessingResults[["DF"]][["Preprocessing_output"]]

## ----show_preprocessing_results, echo=FALSE-----------------------------------
# Check how our data looks like:
Intra_Preprocessed[29:32, 1:9]%>%
kableExtra::kbl(caption = "Preview of the pre-processing results, which has an additional column `Outlier` including the results of Hotellins T2.") %>%
kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12) #%>%
#kableExtra::scroll_box(width = "100%", height = "200px")

## ----remove_outliers----------------------------------------------------------
Intra_Preprocessed <- Intra_Preprocessed%>%
filter(Outliers=="no")#remove MS55_29

## ----replicate_sum------------------------------------------------------------
Intra_Preprocessed <- replicate_sum(data=Intra_Preprocessed[,-c(1:4)],
metadata_sample=Intra_Preprocessed[,c(1:4)],
metadata_info = c(Conditions="Conditions", Biological_Replicates="Biological_Replicates", Analytical_Replicates="Analytical_Replicates"))

## ----pca_plot, fig.align="left", fig.width=6, fig.height=4.5, fig.cap="Figure: Do the samples cluster for the Cell type?"----
#Create the metadata file:
MetaData_Sample <- Intra_Preprocessed[,c(1:2)]%>%
mutate(Celltype = case_when(Conditions=="HK2" ~ 'Healthy',
Conditions=="786-O" ~ 'Primary Tumour',
TRUE ~ 'Metastatic Tumour'))%>%
mutate(Status = case_when(Conditions=="HK2" ~ 'Healthy',
TRUE ~ 'Cancer'))
#Make PCA plot
viz_pca(metadata_info= c(color="Celltype", shape="Status"),
                    metadata_sample= MetaData_Sample,
                    data= Intra_Preprocessed[,-c(1:5)],
                    plot_name = "Cell type")

## ----heatmap_plot, fig.align="left", fig.cap="Colour for sample metadata."----
viz_heatmap(data = Intra_Preprocessed[,-c(1:4)],
                        metadata_sample = MetaData_Sample,
                        metadata_info = c(color_Sample = list("Conditions","Biological_Replicates", "Celltype", "Status")))

## ----dma, fig.width=7, fig.height=5, fig.align="left"-------------------------
# Perform multiple comparison All_vs_One using annova:
DMA_Res <- dma(data=Intra_Preprocessed[,-c(1:3)], #we need to remove columns that do not include metabolite measurements
metadata_sample=Intra_Preprocessed[,c(1:3)],#only maintain the information about condition and replicates
metadata_info = c(Conditions="Conditions", Numerator="786-M1A" , Denominator = "HK2"),# we compare 786-M1A_vs_HK2
pval ="t.test",
padj="fdr")

# Inspect the dma results tables:
DMA_786M1A_vs_HK2 <- DMA_Res[["dma"]][["786-M1A _vs_ HK2"]]

## ----show_dma_results, echo=FALSE---------------------------------------------
# Check how our data looks like:
DMA_786M1A_vs_HK2[c(7,9,11:12,14),]%>%
kableExtra::kbl(caption = "2. Preview of the dma results for the comparison of 786-M1A versus HK2 cells.", row.names=FALSE) %>%
kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12)

## ----match_ids_kegg-----------------------------------------------------------
#--------Add metabolite IDs to our example data:
# 1. Load Feature metainformation of our example data
data(cellular_meta)

MappingInfo <- cellular_meta

# 2. Merge with our differential results (FYI: you can also do this automatically as part of the dma function using the parameter metadata_feature)
ORA_Input <- merge(DMA_786M1A_vs_HK2,
MappingInfo,
by= "Metabolite",
all.x=TRUE)%>%
dplyr::filter(!is.na(KEGGCompound))%>%#remove features without KEGG ID
tibble::column_to_rownames("KEGGCompound")%>%
dplyr::select(-Metabolite)

#--------Load KEGG pathways:
KEGG_Pathways <- metsigdb_kegg()


## ----run_ora------------------------------------------------------------------
#Perform ORA
DM_ORA_res <- standard_ora(data= ORA_Input , #Input data requirements: column `t.val` and column `Metabolite`
metadata_info=c(pvalColumn="p.adj", percentageColumn="t.val", PathwayTerm= "term", PathwayFeature= "Metabolite"),
input_pathway=KEGG_Pathways,#Pathway file requirements: column `term`, `Metabolite` and `Description`. Above we loaded the Kegg_Pathways using Load_KEGG()
pathway_name="KEGG")


# Lets check how the results look like:
DM_ORA_786M1A_vs_HK2 <- DM_ORA_res[["ClusterGosummary"]]

## ----show_ora_results, echo=FALSE---------------------------------------------
# Check how our data looks like:
DM_ORA_786M1A_vs_HK2[c(1:5),-1]%>%
kableExtra::kbl(caption = "Preview of the ORA results for the comparison of 786-M1A versus HK2 cells.", row.names=FALSE) %>%
kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12)

## ----volcano_pea--------------------------------------------------------------
#Here we select only a few pathways to make only the most important plots:
InputPEA2 <- DM_ORA_786M1A_vs_HK2 %>%
filter(!is.na(GeneRatio)) %>%
filter(pvalue <= 0.1)%>%
dplyr::rename("term"="ID")

viz_volcano(plot_types="PEA",
                        metadata_info= c(PEA_Pathway="term",# Needs to be the same in both, metadata_feature and data2.
                        PEA_stat="pvalue",#Column data2
                        PEA_score="GeneRatio",#Column data2
                        PEA_Feature="Metabolite"),# Column metadata_feature (needs to be the same as row names in data)
                        metadata_feature= KEGG_Pathways,#Must be the pathways used for pathway analysis
                        data= ORA_Input, #Must be the data you have used as an input for the pathway analysis
                        data2= InputPEA2, #Must be the results of the pathway analysis
                        plot_name= "KEGG",
                        select_label = NULL)

## ----load_mca_rules-----------------------------------------------------------
# Example of all possible flows:
data(mca_twocond_rules)

MCA2Cond_Rules <- mca_twocond_rules

## ----show_mca_2cond_rules, echo=FALSE-----------------------------------------
# Check how our data looks like:
MCA2Cond_Rules%>%
kableExtra::kbl(caption ="Metabolite Clustering Analysis: 2 Conditions.", row.names=FALSE) %>%
kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12)

# easyalluvial::alluvial_wide(mca_2cond[,c(1:2,4)], fill_by = 'last_variable' )
# easyalluvial::alluvial_wide(mca_2cond[,c(1:2,5)], fill_by = 'last_variable' )

## ----load_mca_core_rules------------------------------------------------------
# Example of all possible flows:
data(mca_core_rules)

MCA_CoRe_Rule <- mca_core_rules

## ----show_mca_core_rules, echo=FALSE------------------------------------------
# Check how our data looks like:
MCA_CoRe_Rule[,1:6]%>%
kableExtra::kbl(caption ="Metabolite Clustering Analysis: core.", row.names=FALSE) %>%
kableExtra::kable_classic(full_width = FALSE, html_font = "Cambria", font_size = 12)

## ----session_info, echo=FALSE-----------------------------------------------------------------------------------------
options(width = 120)
sessionInfo()