## ----eval = FALSE-------------------------------------------------------------
# ## try http:// if https:// URLs are not supported
# if (!requireNamespace("BiocManager", quietly=TRUE))
#     install.packages("BiocManager")
# BiocManager::install("scFeatureFilter")

## ----message=FALSE, warning=FALSE, collapse=TRUE------------------------------
library(scFeatureFilter)

library(ggplot2)
library(cowplot) # multipanel figures + nice theme

## ----collapse=TRUE------------------------------------------------------------
# example dataset included with the package:
scData_hESC

# filtering of the dataset with a single function call:
sc_feature_filter(scData_hESC)

## ----collapse=TRUE------------------------------------------------------------
scData_hESC

## ----collapse=TRUE------------------------------------------------------------
calculate_cvs(scData_hESC)

## ----collapse=TRUE------------------------------------------------------------
calculate_cvs(scData_hESC) |>
    plot_mean_variance(colourByBin = FALSE)

## ----collapse=TRUE------------------------------------------------------------
scData_hESC |>
    calculate_cvs() |>
    define_top_genes(window_size = 100) |>
    bin_scdata(window_size = 1000)

## ----collapse=TRUE------------------------------------------------------------
myPlot <- scData_hESC |>
    calculate_cvs() |>
    define_top_genes(window_size = 100) |>
    bin_scdata(window_size = 1000) |>
    plot_mean_variance(colourByBin = TRUE, density_color = "blue")

myPlot

## ----collapse=TRUE------------------------------------------------------------
myPlot + annotation_logticks(sides = "l")

## ----collapse=TRUE------------------------------------------------------------
corDistrib <- scData_hESC |>
    calculate_cvs() |>
    define_top_genes(window_size = 100) |>
    bin_scdata(window_size = 1000) |>
    correlate_windows(n_random = 3)

## ----collapse=TRUE------------------------------------------------------------
corDens <- correlations_to_densities(corDistrib, absolute_cc = TRUE)
plot_correlations_distributions(corDens, facet_ncol = 5) +
    scale_x_continuous(breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"))

## ----collapse=TRUE------------------------------------------------------------
metrics <- get_mean_median(corDistrib)
metrics
plot_correlations_distributions(corDens, metrics = metrics, facet_ncol = 5) +
    scale_x_continuous(breaks = c(0, 0.5, 1), labels = c("0", "0.5", "1"))

## ----collapse=TRUE------------------------------------------------------------
plot_metric(metrics, show_ctrl = FALSE, show_threshold = FALSE)

## ----collapse=TRUE------------------------------------------------------------
plot_metric(metrics, show_ctrl = TRUE, show_threshold = FALSE)

## ----collapse=TRUE------------------------------------------------------------
plot_metric(metrics, show_ctrl = TRUE, show_threshold = TRUE, threshold = 2)

## ----collapse=TRUE------------------------------------------------------------
determine_bin_cutoff(metrics, threshold = 2)

## ----collapse=TRUE------------------------------------------------------------
binned_data <- scData_hESC |>
    calculate_cvs() |>
    define_top_genes(window_size = 100) |>
    bin_scdata(window_size = 1000)
metrics <- correlate_windows(binned_data, n_random = 3) |>
    get_mean_median()

filtered_data <- filter_expression_table(
    binned_data,
    bin_cutoff = determine_bin_cutoff(metrics)
)

dim(scData_hESC)
dim(filtered_data)
filtered_data

## ----message=FALSE, warning=FALSE, collapse=TRUE------------------------------
library(SingleCellExperiment)
library(scRNAseq) # example datasets

sce_allen <- ReprocessedAllenData()

# sce_allen is an SingleCellExperiment object
sce_allen

filtered_allen <- sc_feature_filter(sce_allen, sce_assay = "rsem_tpm")
is.matrix(filtered_allen) # filtered_allen is a tibble

sce_filtered_allen <- sce_allen[rownames(filtered_allen), ]
sce_filtered_allen

## ----collapse=TRUE------------------------------------------------------------
plot_top_window_autocor(calculate_cvs(scData_hESC))

## ----collapse=TRUE------------------------------------------------------------
metrics_bigBins <- scData_hESC |>
    calculate_cvs() |>
    define_top_genes(window_size = 100) |>
    bin_scdata(window_size = 1000) |>
    correlate_windows(n_random = 3) |>
    get_mean_median()

metrics_smallBins <- scData_hESC |>
    calculate_cvs() |>
    define_top_genes(window_size = 100) |>
    bin_scdata(window_size = 500) |>
    correlate_windows(n_random = 3) |>
    get_mean_median()

plot_grid(
    plot_metric(metrics_bigBins) +
        labs(title = "1000 genes per bin"),
    plot_metric(metrics_smallBins) +
        labs(title = "500 genes per bin")
)

