## ----v1, include = FALSE------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = TRUE
)

## ----install, eval = FALSE----------------------------------------------------
# # Install package: devtools if not present
# if (!require("devtools", quietly = TRUE))
#   install.packages("devtools")
# 
# devtools::install_github("dblux/HVP")

## ----quickstart---------------------------------------------------------------
library(HVP)

crosstab <- matrix(10, 3, 2)
# Simulate microarray data with 100 features
data <- simulateMicroarray(crosstab, 100)

X <- data$X # matrix with dimensions (nfeature, nsamples)
batch <- data$metadata$batch # vector representing batch
class <- data$metadata$class # vector representing class

res <- HVP(X, batch, class)
res@HVP # proportion of variance attributable to batch effects

## ----sce, message = FALSE-----------------------------------------------------
library(splatter)
library(scater)

params <- newSplatParams()
params <- setParams(
  params,
  nGenes = 100, # 100 features
  batchCells = c(150, 150), # Simulate 300 cells
  group.prob = c(0.5, 0.5),
  batch.facLoc = 0, # location parameter of log-normal distribution
  batch.facScale = 0.1 # scale parameter of log-normal distribution
)
sce_data <- splatSimulate(params, method = "groups")
sce_data <- logNormCounts(sce_data)

## ----HVP sce------------------------------------------------------------------
res_sce <- HVP(sce_data, "Batch", "Group")

## ----real, message = FALSE----------------------------------------------------
library(ExperimentHub)
library(scater)

eh <- ExperimentHub()
data <- query(eh, "muscData")
kang <- data[["EH2259"]] # downloads data set
kang_ctrl <- kang[rowSums(counts(kang)) > 0, kang$stim == 'ctrl']
kang_ctrl <- logNormCounts(kang_ctrl)
kang_ctrl$ind <- as.factor(kang_ctrl$ind)

res <- HVP(kang_ctrl, "ind", "cell")

## ----seurat, message = FALSE--------------------------------------------------
if (requireNamespace("Seurat", quietly = TRUE)) {
  library(Seurat)
  seurat_data <- as.Seurat(sce_data)
  res_seurat <- HVP(seurat_data, "Batch", "Group")
} else {
  message("Please install the 'Seurat' package to run this example.")
}

## ----perm, message = FALSE----------------------------------------------------
res_permtest <- HVP(X, batch, class, nperm = 1000)
res_permtest@p.value

## ----histogram, message = FALSE-----------------------------------------------
library(ggplot2)

plot(res_permtest)

## ----simulate-----------------------------------------------------------------
crosstab <- matrix(10, 3, 2)
crosstab

data <- simulateMicroarray(crosstab, 100) # 100 genes
names(data)
dim(data$X)
head(data$metadata)

X <- data$X
batch <- data$metadata$batch
class <- data$metadata$class

## ----session------------------------------------------------------------------
sessionInfo()

