## ----include=FALSE------------------------------------------------------------
knitr::opts_chunk$set(
    collapse=TRUE,
    comment="#>"
)

## ----message=FALSE, warning=FALSE, results=FALSE------------------------------
library(CSOA)
library(ggplot2)
library(henna)
library(patchwork)
library(scRNAseq)
library(scuttle)
library(stringr)
library(Seurat)

sceObj <- BaronPancreasData('human')
sceObj <- logNormCounts(sceObj)

seuratObj <- as.Seurat(sceObj)

acinarMarkers <- c('PRSS1', 'KLK1', 'CTRC', 'PNLIP', 'AKR1C3', 'CTRB1', 
                   'DUOXA2', 'ALDOB', 'REG3A', 'SERPINA3', 'PRSS3', 'REG1B', 
                   'CFB',	'GDF15',	'MUC1','ANPEP', 'ANGPTL4', 'OLFM4', 
                   'GSTA1', 'LGALS2', 'PDZK1IP1', 'RARRES2', 'CXCL17', 
                   'UBD', 'GSTA2', 'LYZ', 'RBPJL', 'PTF1A', 'CELA3A', 
                   'SPINK1', 'ZG16', 'CEL', 'CELA2A', 'CPB1', 'CELA1', 
                   'PNLIPRP1', 'RNASE1', 'AMY2B', 'CPA2','CPA1', 'CELA3B', 
                   'CTRB2', 'PLA2G1B', 'PRSS2', 'CLPS', 'REG1A', 'SYCN')

## -----------------------------------------------------------------------------
seuratObj <- runCSOA(seuratObj, list(CSOA_acinar = acinarMarkers), 
                     pairFileTemplate='pairs')

## -----------------------------------------------------------------------------
acinarPairs <- qGrab('pairsCSOA_acinar.qs2')
head(acinarPairs)

## -----------------------------------------------------------------------------
genes <- overlapGenes(acinarPairs)
nGenes <- length(genes)
nGenes
length(acinarMarkers)
setdiff(acinarMarkers, genes)

## -----------------------------------------------------------------------------
seuratObj <- runCSOA(seuratObj, list(CSOA_acinar2 = genes))

## -----------------------------------------------------------------------------
printStats <- function(seuratObj, col1, col2){
    paste0('Pearson correlation: ', cor(seuratObj[[]][[col1]], 
                                        seuratObj[[]][[col2]]))
    absDiffs <- abs(seuratObj[[]][[col1]] - seuratObj[[]][[col2]])
    absDiffsStats <- sapply(list(mean, median, max, min), 
                            function(fun) fun(absDiffs))
    names(absDiffsStats) <- paste0(c('mean', 'median', 'max', 'min'), 
                                   'AbsDiff')
    absDiffsStats
}

## -----------------------------------------------------------------------------
printStats(seuratObj, 'CSOA_acinar', 'CSOA_acinar2')

## -----------------------------------------------------------------------------
acinarPairsSub <- subset(acinarPairs, revCumsum > 10)
nrow(acinarPairsSub)

## -----------------------------------------------------------------------------
genes90 <- overlapGenes(acinarPairsSub)
genes90
nGenes90 <- length(genes90)
nGenes90

## -----------------------------------------------------------------------------
seuratObj <- runCSOA(seuratObj, list(CSOA_acinar3 = genes90))

printStats(seuratObj, 'CSOA_acinar', 'CSOA_acinar3')

## -----------------------------------------------------------------------------
set.seed(123)

## -----------------------------------------------------------------------------
runCSOAReplace <- function(seuratObj, markers, nReplacedGenes, 
                           pairFileTemplate = 'pairs'){
    genesComplement <- setdiff(rownames(seuratObj), markers)
    geneSet <- list(c(sample(markers, length(markers) - nReplacedGenes), 
                      sample(genesComplement, nReplacedGenes)))
    names(geneSet) <- paste0('CSOA_replace', nReplacedGenes)
    seuratObj <- runCSOA(seuratObj, geneSet, pairFileTemplate=pairFileTemplate)
    return(seuratObj)
}

## ----warning=FALSE, out.height='80%', out.width='80%', fig.height=4, fig.width=5----
nReplacedGenes <- ceiling(nGenes90 / 2)
seuratObj <- runCSOAReplace(seuratObj, genes90, nReplacedGenes)

newCol <- paste0('CSOA_replace', nReplacedGenes)
printStats(seuratObj, 'CSOA_acinar3', newCol)
VlnPlot(seuratObj, newCol, group.by='label') + NoLegend()

## -----------------------------------------------------------------------------
length(setdiff(overlapGenes(qGrab('pairsCSOA_replace12.qs2')), genes90))

## ----out.height='80%', out.width='80%', fig.height=4, fig.width=5-------------
nReplacedGenes <- nGenes90 - 2
seuratObj <- runCSOAReplace(seuratObj, genes90, nReplacedGenes)

newCol <- paste0('CSOA_replace', nReplacedGenes)
printStats(seuratObj, 'CSOA_acinar3', newCol)
dfReplace <- qGrab('pairsCSOA_replace22.qs2')
dfReplace
setdiff(overlapGenes(dfReplace), genes90)
VlnPlot(seuratObj, newCol, group.by='label') + NoLegend()

## ----warning=FALSE------------------------------------------------------------

stellateMarkers <- c('COL6A1', 'RGS5', 'COL1A1', 'MMP11', 'THY1', 'COL6A3',
                     'SFRP2', 'COL1A2', 'TNFAIP6', 'TIMP3', 'SPARC', 'COL3A1',
                     'MGP', 'COL6A2', 'COL4A1', 'FN1', 'SPON2', 'TIMP1',
                     'TGFB1', 'INHBA', 'PDGFRA', 'NDUFA4L2', 'MMP14', 'CTGF',
                     'CYGB', 'KRT10', 'PDGFRB', 'DYNLT1', 'GEM')
  
mixedMarkers <- union(acinarMarkers, stellateMarkers)


## ----warning=FALSE, out.height='80%', out.width='80%', fig.height=4, fig.width=5----
seuratObj <- runCSOA(seuratObj, list(CSOA_mixed = mixedMarkers), 
                     pairFileTemplate='pairs')
VlnPlot(seuratObj, 'CSOA_mixed', group.by='label') + NoLegend()

## ----out.height='80%', out.width='80%', fig.height=5, fig.width=5-------------
dfNetwork <- qGrab('pairsCSOA_mixed.qs2')
overlapNetworkPlot(dfNetwork, rankCol='overlapRank')

## -----------------------------------------------------------------------------
dfNetwork <- connectedComponents(dfNetwork)
dplyr::count(dfNetwork, component)

## ----warning=FALSE, out.height='80%', out.width='80%', fig.height=7, fig.width=6----
components <- c(1, 2)
seuratObj <- scoreModules(seuratObj, dfNetwork, components)
p1 <- VlnPlot(seuratObj, 'CSOA_component1', group.by='label') + NoLegend() +
    theme(axis.text.y = element_blank())
p2 <- VlnPlot(seuratObj, 'CSOA_component2', group.by='label') + NoLegend() +
    theme(axis.text.y = element_blank())
p1 / p2


## ----warning=FALSE------------------------------------------------------------
genes1 <- overlapGenes(subset(dfNetwork, component == 1))
genes1
genes2 <- overlapGenes(subset(dfNetwork, component == 2))
genes2


## ----warning=FALSE, out.height='80%', out.width='80%', fig.height=4, fig.width=5----
ductalMarkers <- c('CFTR', 'SERPINA5', 'SLPI', 'TFF1', 'CFB', 'LGALS4',
                   'CTSH',	'PERP', 'PDLIM3',	'WFDC2', 'SLC3A1', 'AQP1',
                   'ALDH1A3', 'VTCN1',	'KRT19', 'TFF2', 'KRT7', 'CLDN4',
                   'LAMB3', 'TACSTD2', 'CCL2', 'DCDC2','CXCL2', 'CLDN10',
                   'HNF1B', 'KRT20', 'MUC1', 'ONECUT1', 'AMBP', 'HHEX',
                   'ANXA4', 'SPP1', 'PDX1', 'SERPINA3', 'GDF15', 'AKR1C3',
                   'MMP7', 'DEFB1', 'SERPING1', 'TSPAN8', 'CLDN1', 'S100A10',
                   'PIGR')

mixedMarkers <- union(acinarMarkers, ductalMarkers)
seuratObj <- runCSOA(seuratObj, list(CSOA_mixed = mixedMarkers), 
                     pairFileTemplate='pairs')
VlnPlot(seuratObj, 'CSOA_mixed', group.by='label') + NoLegend()
dfNetwork <- qGrab('pairsCSOA_mixed.qs2')
dfNetwork <- connectedComponents(dfNetwork)
dplyr::count(dfNetwork, component)

## -----------------------------------------------------------------------------
seuratObj <- runCSOA(seuratObj, list(CSOA_mixed = mixedMarkers), 
                     jaccardCutoff=1/3, pairFileTemplate='pairs')

## ----out.height='80%', out.width='80%', fig.height=5, fig.width=5-------------
dfNetwork <- qGrab('pairsCSOA_mixed.qs2')
overlapNetworkPlot(dfNetwork, rankCol='overlapRank')

## ----message=FALSE, warning=FALSE, out.height='80%', out.width='80%', fig.height=7, fig.width=6----
dfNetwork <- connectedComponents(dfNetwork)
components <- unique(dfNetwork$component)
seuratObj <- scoreModules(seuratObj, dfNetwork, components)
p1 <- VlnPlot(seuratObj, 'CSOA_component1', group.by='label') + NoLegend() +
    theme(axis.text.y=element_blank())
p2 <- VlnPlot(seuratObj, 'CSOA_component2', group.by='label') + NoLegend() +
    theme(axis.text.y=element_blank())
p1 / p2

## ----out.height='100%', out.width='100%', fig.height=6, fig.width=6-----------
geneRadialPlot(acinarPairs, 
               title='Top 100 overlap genes - Acinar markers')

## ----message=FALSE, out.height='100%', out.width='100%', fig.height=6, fig.width=6----
alphaMarkers <- c('GCG', 'TTR', 'PCSK2', 'FXYD5', 'LDB2', 'MAFB',
                  'CHGA', 'SCGB2A1', 'GLS', 'FAP', 'DPP4', 'GPR119',
                  'PAX6', 'NEUROD1', 'LOXL4', 'PLCE1', 'GC', 'KLHL41',
                  'FEV', 'PTGER3', 'RFX6', 'SMARCA1', 'PGR', 'IRX1',
                  'UCP2', 'RGS4', 'KCNK16', 'GLP1R', 'ARX', 'POU3F4',
                  'RESP18', 'PYY', 'SLC38A5', 'TM4SF4', 'CRYBA2', 'SH3GL2', 
                  'PCSK1', 'PRRG2', 'IRX2', 'ALDH1A1','PEMT', 'SMIM24', 
                  'F10', 'SCGN', 'SLC30A8')

gammaMarkers <- c('PPY', 'ABCC9', 'FGB', 'ZNF503', 'MEIS1', 'LMO3', 'EGR3', 
                  'CHN2', 'PTGFR', 'ENTPD2', 'AQP3', 'THSD7A', 'CARTPT',
                  'ISL1', 'PAX6', 'NEUROD1', 'APOBEC2', 'SEMA3E', 'SLITRK6',
                  'SERTM1', 'PXK', 'PPY2P', 'ETV1', 'ARX', 'CMTM8', 'SCGB2A1', 
                  'FXYD2', 'SCGN')

geneSets <- list(acinarMarkers, alphaMarkers, ductalMarkers, gammaMarkers)
names(geneSets) <- c('CSOA_acinar', 'CSOA_alpha', 'CSOA_ductal', 'CSOA_gamma')
seuratObj <- runCSOA(seuratObj, geneSets, pairFileTemplate='pairs', 
                             keepOverlapOrder=TRUE)

geneRadialPlot(lapply(paste0('pairs', names(geneSets), '.qs2'), qGrab),
               groupLegendTitle='Gene set',
               groupNames=str_remove(names(geneSets), 'CSOA_'),
               cutoff=15,
               title='Top 15 overlap genes',
               extraCircles=2)


## -----------------------------------------------------------------------------
sessionInfo()

