## ----echo = FALSE-------------------------------------------------------------
library(knitr)

## ----eval=FALSE---------------------------------------------------------------
# if(!requireNamespace("sgdGMF", quietly = TRUE))
#     install.packages("sgdGMF")
# 
# BiocManager::install("omicsGMF")

## ----echo = TRUE, warning=FALSE, message=FALSE--------------------------------
library(sgdGMF)
library(omicsGMF)
library(dplyr)
library(scuttle)
set.seed(100)

## -----------------------------------------------------------------------------
sim_intensities <- matrix(rnorm(n = 20*50, mean = 1, sd = 1),
                          ncol = 20)
NAs <- rbinom(n = prod(dim(sim_intensities)), size = 1, prob = 0.3) == 1
sim_intensities[NAs] <- NA

colnames(sim_intensities) <- paste0("S_", c(1:20))
rownames(sim_intensities) <-  paste0("G_", c(1:50))

example_sce <- SingleCellExperiment(
    assays = SimpleList("logintensities" = sim_intensities),
    colData = data.frame("Batch" = rep(c("Batch1", "Batch2"), each = 10)))

X <- model.matrix(~Batch, data = colData(example_sce))

## -----------------------------------------------------------------------------
example_sce <- runCVGMF(
    example_sce, 
    X = X,                          # Covariate matrix
    exprs_values="logintensities",  # Use log-transformed intensities
    family = gaussian(),            # Gaussian model for proteomics data
    ncomponents = c(1:5))           # Test components from 1 to 5

metadata(example_sce)$cv_GMF %>% 
    group_by(ncomp) %>% 
    summarise(mean_dev = mean(dev),
              mean_aic = mean(aic),
              mean_bic = mean(bic),
              mean_mae = mean(mae),
              mean_mse = mean(mse))

plotCV(example_sce, name = "cv_GMF")


## -----------------------------------------------------------------------------
example_sce <- runRankGMF(example_sce, 
                                    X = X, 
                                    exprs_values="logintensities", 
                                    family = gaussian(),
                                    maxcomp = 10)

plotRank(example_sce, maxcomp = 10)

## -----------------------------------------------------------------------------
example_sce <- runGMF(
    example_sce, 
    exprs_values="logintensities", 
    family = gaussian(), 
    ncomponents = 3, # Use optimal dimensionality, here arbitrarily chosen as 3
    name = "GMF")

## ----results = 'hide'---------------------------------------------------------
reducedDimNames(example_sce)
head(reducedDim(example_sce, type = "GMF"))

names(attributes(reducedDim(example_sce, type = "GMF")))
head(attr(reducedDim(example_sce, type = "GMF"), "rotation"))
tail(attr(reducedDim(example_sce, type = "GMF"), "trace"))


## -----------------------------------------------------------------------------
plotReducedDim(example_sce, dimred = "GMF", colour_by = "Batch")

## ----results = 'hide'---------------------------------------------------------
example_sce <- imputeGMF(example_sce, 
                         exprs_values = "logintensities", 
                         reducedDimName = "GMF",
                         name = "logintensities_imputed")


assay(example_sce,'logintensities')[1:5,1:5]
assay(example_sce,'logintensities_imputed')[1:5,1:5]

## -----------------------------------------------------------------------------
sessionInfo()

