## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----setup--------------------------------------------------------------------
library(MetaboDynamics)
library(SummarizedExperiment) # storing and manipulating simulated metabolomics data
library(ggplot2) # visualization
library(dplyr) # data handling
library(tidyr) # data handling

## -----------------------------------------------------------------------------
data("longitudinalMetabolomics_df", package = "MetaboDynamics")

## ----fig.wide=TRUE------------------------------------------------------------
data("longitudinalMetabolomics_df")
# take a sample (of five metabolites and subset data
samples <- c(
  "UMP", "PEP",
  "2-Aminomuconate", "ATP"
)
data <- longitudinalMetabolomics_df %>% filter(metabolite %in% samples)

head(data)

## -----------------------------------------------------------------------------
# fit dynamics model
fit <- #  when using a data frame as input fits have to stored in a separate object
  fit_dynamics_model(
    model = "scaled_log",
    data = data,
    scaled_measurement = "m_scaled", # in which column are scaled measurments stored?
    max_treedepth = 10,
    adapt_delta = 0.99, # default 0.95
    iter = 2000,
    warmup = 2000 / 4, # default is 1/4 of iterations
    chains = 1, # only set to 1 in vignette, recommended default is 4!
    cores = 1 # only set to 1 in vignette, can be same as chains if machine allows for parallelization
  )

## -----------------------------------------------------------------------------
# Extract diagnostics
diagnostics <- # when using a data frame as input diagnostics have to be stored in a separate object
  diagnostics_dynamics(
    data = data, # data frame that was used to fit the dynamics model,
    fit = fit, # list of fits from dynamics model, result of fit_dynamics_mode function
    iter = 2000, # how many iterations were used to fit the dynamics model
    chains = 1, # how many chains were used to fit the dynamics model
  )

## -----------------------------------------------------------------------------
plot_diagnostics(
  data = data, # data frame used to fit the dynamics model
  diagnostics = diagnostics[["model_diagnostics"]] # summary of diagnostics
)
plot_PPC(
  data = data, posterior = diagnostics[["posterior"]],
  scaled_measurement = "m_scaled"
)

## -----------------------------------------------------------------------------
estimates <- # estimates have to be stored in a separate object when using data frames
  estimates_dynamics(
    data = data,
    fit = fit
  )

## -----------------------------------------------------------------------------
# only visualize differences between time points
plot_estimates(
  # does not need data as input
  estimates = estimates,
  delta_t = TRUE, # choose to visualize differences between time points
  dynamics = FALSE,
  distance_conditions = FALSE
)

# only visualize dynamics
plot_estimates(
  estimates = estimates,
  delta_t = FALSE,
  distance_conditions = FALSE,
  dynamics = TRUE
) # choose to visualize the dynamics

# only visualize euclidean distance between dynamics vectors
# only visualize dynamics
plot_estimates(
  estimates = estimates,
  delta_t = FALSE,
  distance_conditions = TRUE,
  dynamics = FALSE
) # choose to visualize the dynamics

## -----------------------------------------------------------------------------
head(estimates)

## -----------------------------------------------------------------------------
cluster <- # clustering results have to be stored in separate object when using data frame as input
  cluster_dynamics(
    estimates = estimates, # data is now the estimates or a data frame ob similar structure
    fit = fit,
    distance = "euclidean", # which distance method should be used
    agglomeration = "ward.D2", # which agglomeration method for hierarchical clustering should be used
    deepSplit = 2, # sensitivity of cluster analysis,
    minClusterSize = 1, # minimum number of metabolites in one cluster
    B = 10, # number of bootstrapps
  )

## -----------------------------------------------------------------------------
plots <- plot_cluster(cluster)
plots$trees$A
plots$clusterplots$A
plots$lineplots$A
plots$patchwork$A

## -----------------------------------------------------------------------------
# load background dataset
data("modules_compounds")
data("metabolite_modules")
# get KEGGs from data
data("IDs")

## -----------------------------------------------------------------------------
ora <- # store ORA result to separate object when using data frames as input
  ORA_hypergeometric(
    background = modules_compounds,
    annotations = metabolite_modules,
    data = cluster, # dataframe format of clustering output
    tested_column = "middle_hierarchy",
    IDs = IDs
  )

# Visualization
plot_ORA(
  data = ora,
  tested_column = "middle_hierarchy"
)

## -----------------------------------------------------------------------------
comparison_dynamics <- # result needs to be stored in a separate object when using data frames
  compare_dynamics(
    data = cluster,
    # clustering result
    cores = 1 # only set to 1 for vignette, can be increased to 4 for parallelization
  )

## -----------------------------------------------------------------------------
# Visualize comparison results
heatmap_dynamics(
  estimates = comparison_dynamics[["estimates"]],
  data = cluster
)

## -----------------------------------------------------------------------------
# compare metabolite composition
compare_metabolites <-
  compare_metabolites(
    data = cluster
  )

# Visualization
heatmap_metabolites(
  distances = compare_metabolites,
  data = cluster
)

## -----------------------------------------------------------------------------
# combine comparison results
temp <- left_join(
  comparison_dynamics[["estimates"]], # dynamics comparison
  compare_metabolites,
  join_by("cluster_a", "cluster_b") # join by cluster comparisons
)

# get unique clusters
x <- unique(c(temp[, "cluster_a"], temp[, "cluster_b"]))

# draw plot
ggplot(temp, aes(x = cluster_b, y = cluster_a)) +
  geom_point(aes(size = Jaccard, col = mu_mean)) +
  theme_bw() +
  scale_color_viridis_c(option = "magma") +
  scale_x_discrete(limits = x) +
  xlab("") +
  ylab("") +
  scale_y_discrete(limits = x) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  labs(col = "dynamics distance", size = "metabolite similarity") +
  ggtitle("comparison of clusters", "label = condition + cluster ID")

## -----------------------------------------------------------------------------
sessionInfo()