## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
warning = FALSE,
message = FALSE
)

## ----eval = FALSE, message = FALSE--------------------------------------------
# 
# if (!requireNamespace("BiocManager", quietly = TRUE))
#   install.packages("BiocManager")
# 
# BiocManager::install("RUCova")
# 

## ----eval = FALSE, message = FALSE--------------------------------------------
# 
# remotes::install_github("molsysbio/RUCova@devel", force = TRUE)
# 

## ----eval = TRUE, message = FALSE---------------------------------------------
library(RUCova)
library(ggplot2)
library(tidyr)
library(tibble)
library(dplyr)
library(SingleCellExperiment)
theme_set(theme_classic())

## -----------------------------------------------------------------------------
data <- RUCova::HNSCC_data
data <- as.data.frame(data)
colnames(data) <- make.names(colnames(data)) 
head(data)
rownames(data) <- 1:dim(data)[1]
data <- data |> mutate(cell_id = 1:n())

## -----------------------------------------------------------------------------
sce <- SingleCellExperiment(
  assays = list(counts = t(data[,4:48])),    # Assay data
  rowData = DataFrame(measured_channels = colnames(data[,4:48])),# Metadata for rows
  colData = DataFrame(cell_id = data$cell_id,
                      line = data$line,
                      dose = data$dose), # Metadata for columns
)

### RUCova::sce stores the SingleCellExperiment object above

## -----------------------------------------------------------------------------
m <- c("pH3","IdU","Cyclin_D1","Cyclin_B1", "Ki.67","pRb","pH2A.X","p.p53","p.p38","pChk2","pCDC25c","cCasp3","cPARP","pAkt","pAkt_T308","pMEK1.2","pERK1.2","pS6","p4e.BP1","pSmad1.8","pSmad2.3","pNFkB","IkBa", "CXCL1","Lamin_B1", "pStat1","pStat3", "YAP","NICD")

## -----------------------------------------------------------------------------
# Identify the relevant measured channels (DNA_191Ir and DNA_193Ir)
dna_channels <- c("DNA_191Ir", "DNA_193Ir")
# Calculate and add the `mean_DNA` column to the colData of the SingleCellExperiment
sce <- RUCova::calc_mean_DNA(sce, name_assay = "counts", dna_channels, q = 0.95)


## -----------------------------------------------------------------------------

# Identify the barcoding channels
bc_channels <- c("Pd102Di", "Pd104Di", "Pd105Di", "Pd106Di", "Pd108Di", "Pd110Di", 
                 "Dead_cells_194Pt", "Dead_cells_198Pt")
# Calculate and add the `mean_BC` column to the colData of the SingleCellExperiment
sce <- RUCova::calc_mean_BC(sce, name_assay = "counts", bc_channels, n_bc = 4, q = 0.95)

## -----------------------------------------------------------------------------
x = c("total_ERK", "pan_Akt", "mean_DNA", "mean_BC")

## -----------------------------------------------------------------------------
pca <- t(assay(sce,"counts")) |> 
  as_tibble() |> 
  select(all_of(x)) |> 
  mutate_all(asinh) |> 
  mutate_all(scale) |> 
  prcomp()


## -----------------------------------------------------------------------------
reducedDim(sce, "PCA") <- pca$x

## ----echo=FALSE, out.width = "100%", fig.align = "center",echo=FALSE,fig.cap='RUCova models centering SUCs across samples. A,B) Simple model. C,D) Offset model. E,F) Interaction model. A,C,E) Before RUCova. B,D,F) After RUCova.'----
knitr::include_graphics("Figure_1_methods_acrosssamples.png")

## ----echo=FALSE, out.width = "100%", fig.align = "center",echo=FALSE,fig.cap='RUCova models centering SUCs per sample. A,B) Simple model. C,D) Offset model. E,F) Interaction model. A,C,E) Before RUCova. B,D,F) After RUCova.'----
knitr::include_graphics("Figure_1_methods_persample.png")

## -----------------------------------------------------------------------------
sce_Cal33 <-  sce[,sce$line == "Cal33"]

## ----fig.height=10, fig.width=10----------------------------------------------
matrix_corr <- RUCova::compare_corr(sce_Cal33[c(m,x)]) 
heatmap_corr <- RUCova::heatmap_compare_corr(sce_Cal33[c(m,x)]) #same in lower and upper triangle

## ----fig.height=10, fig.width=10----------------------------------------------

RUCova::heatmap_compare_corr(sce_Cal33[c(m,x)]) #same in lower and upper triangle

## ----echo=TRUE, include = FALSE-----------------------------------------------

sce_Cal33 <-  RUCova::rucova(sce = sce_Cal33, 
                             name_assay_before = "counts",
                             markers = m,
                             SUCs = x, 
                             apply_asinh_SUCs = TRUE, #when taking surrogates we need to asinh-transform them
                             model = "simple",
                             center_SUCs = "per_sample",
                             col_name_sample = "dose",     
                             name_assay_after = "counts_simple_persample")


## ----fig.height=10, fig.width=10----------------------------------------------

heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_persample")

## ----fig.height=10, fig.width=10----------------------------------------------

FC_before <-  t(assay(sce_Cal33,"counts")) |>  
  as.tibble() |> 
  cbind(colData(sce_Cal33)) |> 
  mutate_at(vars(x,m), asinh) |> 
  pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |> 
  group_by(marker) |> 
  summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |> 
  mutate(data = "before RUCova") 

FC_after <- t(assay(sce_Cal33,"counts_simple_persample")) |>  
  as.tibble() |> cbind(colData(sce_Cal33))  |> 
  mutate_at(vars(x,m), asinh) |> 
  pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |> 
  group_by(marker) |> 
  summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |> 
  ungroup() |> 
  mutate(data = "simple all, per sample")

rbind(FC_before,FC_after) |> 
  ggplot(aes(x = logFC, y = marker, fill = data)) + 
  geom_col(position = "dodge")


## ----echo=FALSE---------------------------------------------------------------

sce_Cal33 <- RUCova::rucova(sce = sce_Cal33, 
                            name_assay_before = "counts",
                            markers = m,
                            SUCs = x, 
                            apply_asinh_SUCs = TRUE, #when taking surrogates we need to asinh-transform them
                            model = "simple",
                            center_SUCs = "across_samples",
                            col_name_sample = "dose",     
                            name_assay_after = "counts_simple_acrosssamples")



## ----fig.height=10, fig.width=10----------------------------------------------

heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_acrosssamples")


## ----fig.height=10, fig.width=10----------------------------------------------
FC_before <-  t(assay(sce_Cal33,"counts")) |>  
  as.tibble() |> 
  cbind(colData(sce_Cal33)) |> 
  mutate_at(vars(x,m), asinh) |> 
  pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |> 
  group_by(marker) |> 
  summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |> 
  mutate(data = "before RUCova") 

FC_after <- t(assay(sce_Cal33,"counts_simple_acrosssamples")) |>  
  as.tibble() |> cbind(colData(sce_Cal33))  |> 
  mutate_at(vars(x,m), asinh) |> 
  pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |> 
  group_by(marker) |> 
  summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |> 
  ungroup() |> 
  mutate(data = "simple all, per sample")

rbind(FC_before,FC_after) |> 
  ggplot(aes(x = logFC, y = marker, fill = data)) + 
  geom_col(position = "dodge")


## -----------------------------------------------------------------------------
pca_cal33 <- t(assay(sce_Cal33,"counts")) |>
  as.tibble() |> 
  cbind(colData(sce_Cal33)) |> 
  select(x) |>
  mutate_all(asinh) |>
  mutate_all(scale) |>
  prcomp()


## -----------------------------------------------------------------------------
tibble(perc = as.numeric(pca_cal33$sdev^2/sum(pca_cal33$sdev^2))*100,
       PC = 1:length(pca_cal33$sdev)) |>
  ggplot(aes(x = PC, y = perc, label = round(perc,1))) +
  geom_col() +
  geom_label()

## ----fig.width=15, fig.height=5-----------------------------------------------
as.data.frame(pca_cal33$rotation) |>
  rownames_to_column("x") |>
  pivot_longer(names_to = "PC", values_to = "loadings", -x) |>
  ggplot(aes(x = loadings, y = x)) +
  geom_col() +
  facet_wrap(~PC, nrow = 1)

## ----eval = FALSE-------------------------------------------------------------
# pca_cal33$x |> as.data.frame() |> mutate(PC1 = -PC1) #variable not saved as not necessary here

## -----------------------------------------------------------------------------
name_reduced_dim = "PCA"
reducedDim(sce_Cal33, name_reduced_dim) <- pca_cal33$x

## ----echo=FALSE---------------------------------------------------------------

sce_Cal33 <-  RUCova::rucova(sce = sce_Cal33, 
                             name_assay_before = "counts",
                             markers = m,
                             SUCs = "PC1", 
                             name_reduced_dim = "PCA",
                             apply_asinh_SUCs = FALSE, #when taking surrogates we need to asinh-transform them
                             model = "simple",
                             center_SUCs = "across_samples",
                             col_name_sample = "dose",     
                             name_assay_after = "counts_simple_PC1")



## ----fig.height=10, fig.width=10----------------------------------------------

heatmap_compare_corr(sce_Cal33, name_assay_before = "counts", name_assay_after = "counts_simple_PC1", name_reduced_dim = "PCA")


## ----fig.height=10, fig.width=10----------------------------------------------
FC_before <-  t(assay(sce_Cal33,"counts")) |>  
  as.tibble() |> 
  cbind(colData(sce_Cal33)) |> 
  mutate_at(vars(x,m), asinh) |> 
  pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |> 
  group_by(marker) |> 
  summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |> 
  mutate(data = "before RUCova") 

FC_after <- t(assay(sce_Cal33,"counts_simple_PC1")) |>  
  as.tibble() |> cbind(colData(sce_Cal33))  |> 
  mutate_at(vars(x,m), asinh) |> 
  pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |> 
  group_by(marker) |> 
  summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |> 
  ungroup() |> 
  mutate(data = "simple all, per sample")

rbind(FC_before,FC_after) |> 
  ggplot(aes(x = logFC, y = marker, fill = data)) + 
  geom_col(position = "dodge")


## ----fig.height=5, fig.width=10-----------------------------------------------

r2_a <- metadata(sce_Cal33)$model_counts_simple_acrosssamples$adjr2 |> mutate(model = "simple all, across samples")
r2_b <- metadata(sce_Cal33)$model_counts_simple_persample$adjr2|> mutate(model = "simple all, per sample")
r2_c <- metadata(sce_Cal33)$model_counts_simple_PC1$adjr2 |> mutate(model = "simple PC1, across sample")

rbind(rbind(r2_a,r2_b),r2_c) |> 
  ggplot(aes(x = adj_r_squared, y = marker, fill = model)) + 
  geom_col(position = "dodge")


## ----fig.height=5, fig.width=10-----------------------------------------------
rbind(rbind(r2_a,r2_b),r2_c) |> 
  ggplot(aes(y = adj_r_squared, x = model, color = model)) + 
  geom_boxplot()


## ----fig.height=5, fig.width=20-----------------------------------------------
slope_a <- metadata(sce_Cal33)$model_counts_simple_acrosssamples$stand_slopes |> mutate(model = "simple all, across samples") 
slope_b <- metadata(sce_Cal33)$model_counts_simple_persample$stand_slopes |> mutate(model = "simple all, per sample")
slope_c <- metadata(sce_Cal33)$model_counts_simple_PC1$stand_slopes |> mutate(model = "simple PC1, across sample")

rbind(slope_a, slope_b, slope_c) |> 
  mutate(coef_key = factor(coef_key, levels = c("mean_DNA", "mean_BC","pan_Akt", "total_ERK", "PC1"))) |> 
  ggplot(aes(x = stand_value, y = marker, fill = model)) + 
  geom_col(position = "dodge") + 
  facet_wrap(~coef_key, nrow = 1)


## -----------------------------------------------------------------------------

sce_Cal33 <- RUCova::rucova(sce = sce_Cal33, 
                            name_assay_before = "counts",
                            markers = m,
                            SUCs = x, 
                            apply_asinh_SUCs = TRUE, 
                            keep_offset = FALSE,
                            model = "offset",
                            center_SUCs = "per_sample",
                            col_name_sample = "dose",     
                            name_assay_after = "counts_offset_persample")

## ----fig.height=5, fig.width=10-----------------------------------------------

FC_before <- t(assay(sce_Cal33,"counts")) |> 
  as.tibble() |>
  cbind(colData(sce_Cal33))  |> 
  mutate_at(vars(x,m), asinh) |> 
  pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |> 
  group_by(marker) |> 
  summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |> 
  mutate(data = "before RUCova") |> 
  ungroup() 

FC_after <- t(assay(sce_Cal33,"counts_offset_persample")) |> 
  as.tibble() |>
  cbind(colData(sce_Cal33))  |> 
  mutate_at(vars(x,m), asinh) |> 
  pivot_longer(names_to = "marker", values_to = "value", c(x,m)) |> 
  group_by(marker) |> 
  summarise(logFC = mean(value[dose=="10Gy"])-mean(value[dose=="0Gy"])) |> 
  ungroup() |> 
  mutate(data = "offset all, per sample") 


rbind(FC_before, FC_after) |> 
  ggplot(aes(x = logFC, y = marker, fill = data)) + 
  geom_col(position = "dodge")


## ----fig.height=5, fig.width=10-----------------------------------------------
metadata(sce_Cal33)$model_counts_offset_persample$eff_coefficients |> 
  filter(surrogate == FALSE) |> 
  ggplot(aes(x = eff_value, y = marker, fill = sample)) + 
  geom_col(position = "dodge") +
  xlab("eff_value (intercept)")


## -----------------------------------------------------------------------------
sce <- RUCova::rucova(sce = sce, 
                      name_assay_before = "counts",
                      markers = m,
                      SUCs = x, 
                      apply_asinh_SUCs = TRUE, 
                      model = "interaction",
                      center_SUCs = "across_samples",
                      col_name_sample = "line",     
                      name_assay_after = "counts_interaction_persample")



## ----fig.height=10, fig.width=10----------------------------------------------
metadata(sce)$model_counts_interaction_persample$eff_coefficients |> 
  filter(surrogate != FALSE) |> 
  ggplot(aes(x = eff_value, y = marker, fill = sample)) + 
  geom_col(position = "dodge") +
  facet_wrap(~surrogate) + 
  xlab("eff_value (slope)")


## -----------------------------------------------------------------------------
sessionInfo()

