## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "##"
)

## ----install, eval = FALSE----------------------------------------------------
# if (!requireNamespace("BiocManager", quietly = TRUE)) {
#     install.packages("BiocManager")
# }
# 
# BiocManager::install("GOaGO")

## ----start, message = FALSE---------------------------------------------------
library(GOaGO)

## ----dataset------------------------------------------------------------------
data("genePairsGM12878")
head(genePairsGM12878)

## ----nrow---------------------------------------------------------------------
nrow(genePairsGM12878)

## ----goago, message = FALSE---------------------------------------------------
library(org.Hs.eg.db)

# set the number of CPU threads to use
library(BiocParallel)
options(MulticoreParam = MulticoreParam(workers = 2))

goago <- GOaGO(genePairsGM12878,
    keyType = "ENTREZID", OrgDb = org.Hs.eg.db, ont = "ALL"
)

## ----result-------------------------------------------------------------------
head(as.data.frame(goago))

## ----dotplot, fig.cap = "Dotplot of 10 most enriched Gene Ontology terms"-----
DotPlot(goago)

## ----ridgeplot, fig.cap = "Ridgeplot of 10 most enriched Gene Ontology terms"----
RidgePlot(goago)

## ----setup-comparison, message = FALSE----------------------------------------
library(clusterProfiler)
library(ggplot2)
library(ggrepel)

## ----ego----------------------------------------------------------------------
genes <- with(
    subset(genePairsGM12878, geneID1 != geneID2),
    unique(c(geneID1, geneID2))
)

ego <- enrichGO(genes,
    keyType = "ENTREZID", OrgDb = org.Hs.eg.db, ont = "ALL"
)

## ----dotplot-ego, fig.cap = "Dotplot of 10 most enriched Gene Ontology terms, using unpaired approach by enrichGO"----
DotPlot(ego)

## ----goago-ego-relaxed--------------------------------------------------------
goago_relaxed <- GOaGO(genePairsGM12878,
    keyType = "ENTREZID", OrgDb = org.Hs.eg.db, ont = "ALL",
    pvalueCutoff = Inf, qvalueCutoff = Inf
)

ego_relaxed <- enrichGO(genes,
    keyType = "ENTREZID", OrgDb = org.Hs.eg.db, ont = "ALL",
    pvalueCutoff = Inf, qvalueCutoff = Inf
)

## ----merge-go-ego-------------------------------------------------------------
df <- merge(as.data.frame(goago_relaxed), as.data.frame(ego_relaxed),
    by = c("ONTOLOGY", "ID", "Description"), suffixes = c(".GOaGO", ".enrichGO")
)

df$signif.GOaGO <- df$ID %in% as.data.frame(goago)$ID
df$signif.enrichGO <- df$ID %in% ego$ID

df$enrichment <- ifelse(df$signif.GOaGO,
    ifelse(df$signif.enrichGO, "goago_and_ego", "goago_only"),
    ifelse(df$signif.enrichGO, "ego_only", "insignificant")
)

## ----comparison, fig.cap = "Gene Ontology term enrichment using unpaired approach by enrichGO (X-axis) and paired approach by GO-a-GO (Y-axis)", fig.wide = TRUE, fig.asp = 0.7----
col_labels <- c(
    goago_and_ego = "GO-a-GO and enrichGO",
    goago_only = "GO-a-GO (paired) only",
    ego_only = "enrichGO (unpaired) only",
    insignificant = "none of the above"
)

col_values <- c(
    goago_and_ego = "#6a3d9a",
    goago_only = "#e31a1c",
    ego_only = "#1f78b4",
    insignificant = "gray80"
)

ggplot(
    df,
    aes(x = log2(FoldEnrichment.enrichGO), y = log2(FoldEnrichment.GOaGO), size = Count.GOaGO, color = enrichment)
) +
    geom_vline(xintercept = 0, lty = 2) +
    geom_hline(yintercept = 0, lty = 2) +
    geom_point(alpha = 0.3) +
    scale_size_area() +
    scale_color_manual("Term enrichment", labels = col_labels, values = col_values) +
    DOSE::theme_dose(12)

## ----comparison-with-labels, fig.cap = "Gene Ontology term enrichment using unpaired approach by enrichGO (X-axis) and paired approach by GO-a-GO (Y-axis), showing only the terms shared by at least 5 gene pairs", fig.wide = TRUE, fig.asp = 0.7----
ggplot(
    subset(df, Count.GOaGO >= 5),
    aes(x = log2(FoldEnrichment.enrichGO), y = log2(FoldEnrichment.GOaGO), size = Count.GOaGO, color = enrichment)
) +
    geom_vline(xintercept = 0, lty = 2) +
    geom_point(alpha = 0.3) +
    geom_text_repel(aes(label = label_wrap_gen(50)(Description)), show.legend = FALSE, size = 3, lineheight = 0.9) +
    scale_size_area() +
    scale_color_manual("Term enrichment", labels = col_labels, values = col_values) +
    DOSE::theme_dose(12)

## ----termGenePairs------------------------------------------------------------
tgp <- termGenePairs(goago, org.Hs.eg.db)
subset(tgp, ID == "GO:0004984") # olfactory receptor activity

## ----num_loops_with_genes-----------------------------------------------------
length(unique(genePairsGM12878$interactionID))

## ----num_unique_gene_pairs----------------------------------------------------
nrow(genePairs(goago))

## ----show---------------------------------------------------------------------
goago

## ----import-bedpe, message = FALSE--------------------------------------------
fpath <- system.file("extdata", "GM12878_loops.bedpe.gz", package = "GOaGO")
pairs <- rtracklayer::import(fpath, genome = "hg19")
pairs

## ----transcripts, message = FALSE---------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
transcripts <- transcripts(TxDb.Hsapiens.UCSC.hg19.knownGene,
    columns = "gene_id", filter = list(cds_strand = c("-", "+"))
)

## ----annotateInteractions-----------------------------------------------------
genePairs <- annotateInteractions(pairs, transcripts, maxDistanceToTSS = 10e3)
genePairs

## ----attr-keyType-------------------------------------------------------------
attr(genePairs, "keyType")

## ----citation-----------------------------------------------------------------
citation("GOaGO")

## ----sessionInfo, echo = FALSE------------------------------------------------
sessionInfo()

