## ----tfcensus-op--------------------------------------------------------------------------------------------
library(OmnipathR)
library(magrittr)
library(dplyr)

tfcensus <-
    annotations(
        resources = 'TFcensus',
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique

## ----tfcensus-direct----------------------------------------------------------------------------------------
library(purrr)

tfcensus_direct <-
    tfcensus_download() %>%
    pull(`HGNC symbol`) %>%
    discard(is.na) %>%
    unique

## ----tfs-from-network---------------------------------------------------------------------------------------
tfs_from_network <-
    transcriptional(
        # confidence levels;
        # we use only the 3 levels with highest confidence
        dorothea_levels = c('A', 'B', 'C'),
        entity_types = 'protein'
    ) %>%
    pull(source_genesymbol) %>%
    unique

## ----tf-list------------------------------------------------------------------------------------------------
tfs <- union(tfcensus, tfs_from_network)

## ----localizations------------------------------------------------------------------------------------------
localizations <-
    annotations(
        resources = c(
            'Ramilowski_location',
            'UniProt_location',
            'HPA_subcellular'
        ),
        entity_types = 'protein',
        wide = TRUE
    )

## ----loc-coverage-------------------------------------------------------------------------------------------
localizations %>%
map(function(x){x %>% pull(genesymbol) %>% n_distinct})

## ----ligrec-------------------------------------------------------------------------------------------------
ligands <-
    OmnipathR::intercell(
        parent = 'ligand',
        topology = 'sec',
        consensus_percentile = 50,
        loc_consensus_percentile = 30,
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique %T>%
    length

receptors <-
    OmnipathR::intercell(
        parent = 'receptor',
        topology = 'pmtm',
        consensus_percentile = 50,
        loc_consensus_percentile = 30,
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique %T>%
    length

transmitters <-
    OmnipathR::intercell(
        causality = 'trans',
        topology = 'sec',
        consensus_percentile = 50,
        loc_consensus_percentile = 30,
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique %T>%
    length

receivers <-
    OmnipathR::intercell(
        causality = 'rec',
        topology = 'pmtm',
        consensus_percentile = 50,
        loc_consensus_percentile = 30,
        entity_types = 'protein'
    ) %>%
    pull(genesymbol) %>%
    unique %T>%
    length

## ----ppi----------------------------------------------------------------------------------------------------
ppi <-
    omnipath(
        datasets = 'omnipath',
        entity_types = 'protein'
    )

## ----signalink----------------------------------------------------------------------------------------------
signalink_pathways <-
    annotations(
        resources = 'SignaLink_pathway',
        entity_types = 'protein',
        wide = TRUE
    )

signalink_functions <-
    annotations(
        resources = 'SignaLink_function',
        entity_types = 'protein',
        wide = TRUE
    )

## ----lig-of-interest----------------------------------------------------------------------------------------
ligands_of_interest <-
    intersect(
        signalink_pathways %>%
            filter(pathway == 'TGF') %>%
            pull(genesymbol),
        signalink_functions %>%
            filter(`function` == 'Ligand') %>%
            pull(genesymbol)
    ) %T>%
    length

## ----ppi-graph----------------------------------------------------------------------------------------------
ppi_graph <-
    ppi %>%
    interaction_graph

## ----in-network---------------------------------------------------------------------------------------------
library(igraph)

ligands_of_interest__in_network <-
    ligands_of_interest %>%
    keep(. %in% V(ppi_graph)$name)

tfs__in_network <-
    tfs %>%
    keep(. %in% V(ppi_graph)$name)

## ----find-paths, eval = FALSE-------------------------------------------------------------------------------
# paths <-
#     ppi_graph %>%
#     find_all_paths(
#         start = ligands_of_interest__in_network,
#         end = tfs__in_network,
#         maxlen = 3,
#         attr = 'name'
#     )

## ----filter-paths, eval = FALSE-----------------------------------------------------------------------------
# # only selecting Cytoplasm, of course we could
# # consider other intracellular compartments
# in_cytoplasm <-
#     localizations$UniProt_location %>%
#     filter(location == 'Cytoplasm') %>%
#     pull(genesymbol) %>%
#     unique
# 
# in_nucleus <-
#     localizations$UniProt_location %>%
#     filter(location %in% c('Nucleus', 'Nucleolus')) %>%
#     pull(genesymbol) %>%
#     unique
# 
# paths_selected <-
#     paths %>%
#     # removing single node paths
#     discard(
#         function(p){
#             length(p) == 1
#         }
#     ) %>%
#     # receptors are plasma membrane transmembrane
#     # according to our query to OmniPath
#     keep(
#         function(p){
#             p[2] %in% receptors
#         }
#     ) %>%
#     # making sure all further mediators are
#     # in the cytoplasm
#     keep(
#         function(p){
#             all(p %>% tail(-2) %>% is_in(in_cytoplasm))
#         }
#     ) %>%
#     # the last nodes are all TFs, implying that they are in the nucleus
#     # but we can optionally filter foall(p %>% tail(-2)r this too:
#     keep(
#         function(p){
#             last(p) %in% in_nucleus
#         }
#     ) %>%
#     # finally, we can remove paths which contain TFs also as intermediate
#     # nodes as these are redundant
#     discard(
#         function(p){
#             !any(p %>% head(-1) %>% is_in(tfs))
#         }
#     )

## ----path-stats, eval = FALSE-------------------------------------------------------------------------------
# paths_selected %>% length
# 
# paths_selected %>% map_int(length) %>% table

## ----intercell-network--------------------------------------------------------------------------------------
ligand_receptor <-
    intercell_network(
        ligand_receptor = TRUE,
        high_confidence = TRUE,
        entity_types = 'protein'
    ) %>%
    simplify_intercell_network

## ----tf-target-network--------------------------------------------------------------------------------------
tf_target <-
    transcriptional(
        # confidence levels;
        # we use only the 2 levels with highest confidence
        dorothea_levels = c('A', 'B'),
        # I added this only to have less interactions so the
        # examples here run faster; feel free to remove it,
        # then you will have more gene regulatory interactions
        # from a variety of databases
        datasets = 'tf_target',
        entity_types = 'protein',
        # A workaround to mitigate a temporary issue (05/01/2022)
        resources = c('ORegAnno', 'PAZAR')
    )

## ----ppi-2, eval = FALSE------------------------------------------------------------------------------------
# ppi <-
#     omnipath(
#         datasets = 'omnipath',
#         entity_types = 'protein'
#     )

## ----annotate-networks--------------------------------------------------------------------------------------
ppi %<>%
    annotated_network('UniProt_location') %>%
    annotated_network('kinase.com') %>%
    # these columns define a unique interaction; 5 of them would be enough
    # for only the grouping, we include the rest only to keep them after
    # the summarize statement
    group_by(
        source,
        target,
        source_genesymbol,
        target_genesymbol,
        is_directed,
        is_stimulation,
        is_inhibition,
        sources,
        references,
        curation_effort,
        n_references,
        n_resources
    ) %>%
    summarize(
        location_source = list(unique(location_source)),
        location_target = list(unique(location_target)),
        family_source = list(unique(family_source)),
        family_target = list(unique(family_target)),
        subfamily_source = list(unique(subfamily_source)),
        subfamily_target = list(unique(subfamily_target))
    ) %>%
    ungroup

tf_target %<>%
    annotated_network('UniProt_location') %>%
    annotated_network('kinase.com') %>%
    group_by(
        source,
        target,
        source_genesymbol,
        target_genesymbol,
        is_directed,
        is_stimulation,
        is_inhibition,
        sources,
        references,
        curation_effort,
        # Workaround to mitigate a temporary issue with
        # DoRothEA data in OmniPath (05/01/2022)
        # dorothea_level,
        n_references,
        n_resources
    ) %>%
    summarize(
        location_source = list(unique(location_source)),
        location_target = list(unique(location_target)),
        family_source = list(unique(family_source)),
        family_target = list(unique(family_target)),
        subfamily_source = list(unique(subfamily_source)),
        subfamily_target = list(unique(subfamily_target))
    ) %>%
    ungroup

## ----tfs-2--------------------------------------------------------------------------------------------------
tfs <-
    tf_target %>%
    pull(source_genesymbol) %>%
    unique

## ----grow-paths-function------------------------------------------------------------------------------------
library(rlang)
library(tidyselect)

grow_paths <- function(paths, step, interactions, endpoints = NULL){

    right_target_col <- sprintf(
        'target_genesymbol_step%i',
        step
    )

    paths$growing %<>% one_step(step, interactions)

    # finished paths with their last node being an endpoint:
    paths[[sprintf('step%i', step)]] <-
        paths$growing %>%
        {`if`(
            is.null(endpoints),
            .,
            filter(., !!sym(right_target_col) %in% endpoints)
        )}

    # paths to be further extended:
    paths$growing %<>%
        {`if`(
            is.null(endpoints),
            NULL,
            filter(., !(!!sym(right_target_col) %in% endpoints))
        )}

    invisible(paths)

}


one_step <- function(paths, step, interactions){

    left_col <- sprintf('target_genesymbol_step%i', step - 1)
    right_col <- sprintf('source_genesymbol_step%i', step)
    by <- setNames(right_col, left_col)

    paths %>%
    # making sure even at the first step we have the suffix `_step1`
    {`if`(
        'target_genesymbol' %in% colnames(.),
        rename_with(
            .,
            function(x){sprintf('%s_step%i', x, step)}
        ),
        .
    )} %>%
    {`if`(
        step == 0,
        .,
        inner_join(
            .,
            # adding suffix for the next step
            interactions %>%
            rename_with(function(x){sprintf('%s_step%i', x, step)}),
            by = by
        )
    )} %>%
    # removing loops
    filter(
        select(., contains('_genesymbol')) %>%
        pmap_lgl(function(...){!any(duplicated(c(...)))})
    )

}

## ----build-paths, eval=FALSE--------------------------------------------------------------------------------
# library(stringr)
# 
# steps <- 2 # to avoid OOM
# 
# paths <-
#     seq(0, steps) %>%
#     head(-1) %>%
#     reduce(
#         grow_paths,
#         interactions = ppi,
#         endpoints = tfs,
#         .init = list(
#             growing = ligand_receptor
#         )
#     ) %>%
#     within(rm(growing)) %>%
#     map2(
#         names(.) %>% str_extract('\\d+$') %>% as.integer %>% add(1),
#         one_step,
#         interactions = tf_target
#     )

## ----packages, eval = FALSE---------------------------------------------------------------------------------
# library(OmnipathR)
# library(magrittr)
# library(dplyr)
# library(purrr)
# library(igraph)
# library(rlang)
# library(tidyselect)
# library(stringr)
# library(rmarkdown)

## ----install, eval = FALSE----------------------------------------------------------------------------------
# missing_packages <-
#     setdiff(
#         c(
#             'magrittr',
#             'dplyr',
#             'purrr',
#             'igraph',
#             'rlang',
#             'tidyselect',
#             'stringr',
#             'rmarkdown',
#             'devtools'
#         ),
#         installed.packages()
#     )
# 
# if(length(missing_packages)){
#     install.packages(missing_packages)
# }
# 
# if(!'OmnipathR' %in% installed.packages()){
#     library(devtools)
#     devtools::install_github('saezlab/OmnipathR')
# }

## ----render, eval = FALSE-----------------------------------------------------------------------------------
# library(rmarkdown)
# render('paths.Rmd', output_format = 'html_document')

## ----session------------------------------------------------------------------------------------------------
sessionInfo()

