## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----load_libraries, message=FALSE, warning=FALSE, echo=TRUE------------------
library("dplyr")
library("ggplot2")
library("viridis")
library("tibble")
library("gridExtra")
library("stringr")
library("depmap")
library("ExperimentHub")

## ----load_data, message=FALSE, warning=FALSE, echo=TRUE-----------------------
## create ExperimentHub query object
eh <- ExperimentHub()
query(eh, "depmap")
rnai <- eh[["EH3080"]]
mutationCalls <- eh[["EH3085"]]
metadata <- eh[["EH3086"]]
TPM <- eh[["EH3084"]]
copyNumber <- eh[["EH3082"]]
# crispr <- eh[["EH3081"]]
# drug_sensitivity <- eh[["EH3087"]]

## ----soft_tissue_cell_lines, echo=TRUE----------------------------------------
## list of dependency scores
rnai |>
    dplyr::select(cell_line, gene_name, dependency) |>
    dplyr::filter(stringr::str_detect(cell_line, "SOFT_TISSUE")) |>
    dplyr::arrange(dependency) |>
    head(10)

## ----message=FALSE, warning=FALSE---------------------------------------------
## Basic histogram
rnai |>
    dplyr::select(gene, gene_name, dependency) |>
    dplyr::filter(gene_name == "RPL14") |>
    ggplot(aes(x = dependency)) +
    geom_histogram() +
    geom_vline(xintercept = mean(rnai$dependency, na.rm = TRUE),
               linetype = "dotted", color = "red") +
    ggtitle("Histogram of dependency scores for gene RPL14")

## ----message=FALSE, warning=FALSE---------------------------------------------
meta_rnai <- metadata |>
    dplyr::select(depmap_id, lineage) |>
    dplyr::full_join(rnai, by = "depmap_id") |>
    dplyr::filter(gene_name == "RPL14") |>
    dplyr::full_join((mutationCalls |>
                      dplyr::select(depmap_id, entrez_id,
                                    is_cosmic_hotspot, var_annotation)),
                     by = c("depmap_id", "entrez_id"))

p1 <- meta_rnai |>
      ggplot(aes(x = dependency, y = lineage)) +
      geom_point(alpha = 0.4, size = 0.5) +
      geom_point(data = subset(
         meta_rnai, var_annotation == "damaging"), color = "red") +
      geom_point(data = subset(
         meta_rnai, var_annotation == "other non-conserving"), color = "blue") +
      geom_point(data = subset(
         meta_rnai, var_annotation == "other conserving"), color = "cyan") +
      geom_point(data = subset(
         meta_rnai, is_cosmic_hotspot == TRUE), color = "orange") +
      geom_vline(xintercept=mean(meta_rnai$dependency, na.rm = TRUE),
                 linetype = "dotted", color = "red") +
      ggtitle("Scatterplot of dependency scores for gene RPL14 by lineage")

p1

## ----message=FALSE, warning=FALSE---------------------------------------------
metadata |>
    dplyr::select(depmap_id, lineage) |>
    dplyr::full_join(TPM, by = "depmap_id") |>
    dplyr::filter(gene_name == "RPL14") |>
    ggplot(aes(x = lineage, y = expression, fill = lineage)) +
    geom_boxplot(outlier.alpha = 0.1) +
    ggtitle("Boxplot of expression values for gene RPL14 by lineage") +
    theme(axis.text.x = element_text(angle = 45, hjust=1)) +
    theme(legend.position = "none")

## ----message=FALSE, warning=FALSE---------------------------------------------
## expression vs rnai gene dependency for Rhabdomyosarcoma Sarcoma
sarcoma <- metadata |>
    dplyr::select(depmap_id, cell_line,
                  primary_disease, subtype_disease) |>
    dplyr::filter(primary_disease == "Sarcoma",
                  subtype_disease == "Rhabdomyosarcoma")

rnai_sub <- rnai |>
    dplyr::select(depmap_id, gene, gene_name, dependency)
tpm_sub <- TPM |>
    dplyr::select(depmap_id, gene, gene_name, expression)

sarcoma_dep <- sarcoma |>
    dplyr::left_join(rnai_sub, by = "depmap_id") |>
    dplyr::select(-cell_line, -primary_disease,
                  -subtype_disease, -gene_name)

sarcoma_exp <- sarcoma |>
    dplyr::left_join(tpm_sub, by = "depmap_id")

sarcoma_dat_exp <- dplyr::full_join(sarcoma_dep, sarcoma_exp,
                                    by = c("depmap_id", "gene")) |>
    dplyr::filter(!is.na(expression))

p2 <- ggplot(data = sarcoma_dat_exp, aes(x = dependency, y = expression)) +
    geom_point(alpha = 0.4, size = 0.5) +
    geom_vline(xintercept=mean(sarcoma_dat_exp$dependency, na.rm = TRUE),
               linetype = "dotted", color = "red") +
    geom_hline(yintercept=mean(sarcoma_dat_exp$expression, na.rm = TRUE),
               linetype = "dotted", color = "red") +
    ggtitle("Scatterplot of rnai dependency vs expression values for gene")

p2 + theme(axis.text.x = element_text(angle = 45))

## -----------------------------------------------------------------------------
sarcoma_dat_exp |>
    dplyr::select(cell_line, gene_name, dependency, expression) |>
    dplyr::arrange(dependency) |>
    head(10)

## ----message=FALSE, warning=FALSE---------------------------------------------
metadata |>
    dplyr::select(depmap_id, lineage) |>
    dplyr::full_join(copyNumber, by = "depmap_id") |>
    dplyr::filter(gene_name == "RPL14") |>
    ggplot(aes(x = lineage, y = log_copy_number, fill = lineage)) +
    geom_boxplot(outlier.alpha = 0.1) +
    ggtitle("Boxplot of log copy number for gene RPL14 by lineage") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
    theme(legend.position = "none")

## ----echo = FALSE-------------------------------------------------------------
sessionInfo()

