## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "100%"
)

## ----eval=FALSE---------------------------------------------------------------
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("CCAFE")

## ----eval=FALSE---------------------------------------------------------------
# if(!require(devtools, quietly = TRUE)) {
#   install.packages("devtools")
# }
# 
# devtools::install_github("https://github.com/wolffha/CCAFE")

## ----echo=TRUE,message=FALSE--------------------------------------------------
library(CCAFE)
library(tidyverse)

## ----CaseControl_AF example---------------------------------------------------
# load the data
data("sampleDat")
sampleDat <- as.data.frame(sampleDat)

results_af <- CaseControl_AF(data = sampleDat,
                             N_case = 16550,
                             N_control = 403923,
                             OR_colname = "OR",
                             AF_total_colname = "true_maf_pop")

head(results_af)

## ----eval=TRUE----------------------------------------------------------------
# plot the results
# first need minor allele frequency to compare to true_maf_case
results_af$MAF_case <- sapply(results_af$AF_case, function(x)
  ifelse(x > 0.5, 1-x, x))
results_af <- as.data.frame(results_af)

ggplot(results_af, aes(x = true_maf_case, y = MAF_case)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
  theme_bw() +
  ggtitle("Example CaseControl_AF()")

## ----CaseControl_SE example - no correction-----------------------------------
# load the data
data("sampleDat")

# always ensure data is a dataframe before inputting to CCAFE methods
sampleDat <- as.data.frame(sampleDat)

# First run without correction
results_se_noCorr <- CaseControl_SE(data = sampleDat,
                                    N_case = 16550,
                                    N_control = 403923,
                                    OR_colname = "OR",
                                    SE_colname = "SE",
                                    chromosome_colname = "CHR",
                                    position_colname = "POS",
                                    do_correction = FALSE,
                                    sex_chromosomes = FALSE)
head(results_se_noCorr)

## ----eval=TRUE----------------------------------------------------------------
# plot the results
ggplot(results_se_noCorr, aes(x = true_maf_case, y = MAF_case)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
  theme_bw() +
  coord_cartesian(ylim = c(0,.5)) +
  ggtitle("Example CaseControl_SE() No Correction")

## ----CaseControl_SE example - correction--------------------------------------
# load the data
data("sampleDat")
# always ensure data is a dataframe before inputting to CCAFE methods
sampleDat <- as.data.frame(sampleDat)

corr_data <- data.frame(CHR = sampleDat$CHR,
                        POS = sampleDat$POS,
                        proxy_MAF = sampleDat$gnomad_maf)

# now run with correction
results_se_corr <- CaseControl_SE(data = sampleDat,
                                  N_case = 16550,
                                  N_control = 403923,
                                  OR_colname = "OR",
                                  SE_colname = "SE",
                                  chromosome_colname = "CHR",
                                  position_colname = "POS",
                                  do_correction = TRUE,
                                  correction_data = corr_data,
                                  sex_chromosomes = FALSE)

head(results_se_corr)

## ----eval=TRUE----------------------------------------------------------------
# plot the results
ggplot(results_se_corr, aes(x = true_maf_case, y = MAF_case_adj)) +
  geom_point() +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
  theme_bw() +
  ggtitle("Example CaseControl_SE() With Correction")

## ----eval=FALSE---------------------------------------------------------------
# BiocManager::install("VariantAnnotation")

## -----------------------------------------------------------------------------
suppressWarnings(suppressPackageStartupMessages({
  library(VariantAnnotation)
  library(tidyverse)
}))

## -----------------------------------------------------------------------------
data("vcf_sample")

## -----------------------------------------------------------------------------
samples(header(vcf_sample))

## -----------------------------------------------------------------------------
vcf_sample

## -----------------------------------------------------------------------------
# first we will get the info from GRanges object (position, RSID)
meta <- as.data.frame(ranges(vcf_sample))
meta <- meta[,c(1, 4)]
colnames(meta) <- c("Position", "RSID")
# get the chromosome (as a)
meta$Chromosome <- as.vector(seqnames(rowRanges(vcf_sample)))

# now we can also get the meta data (REF, ALT) from the GRanges object
meta <- cbind(meta, mcols(vcf_sample)[,c(2,3)])
rownames(meta) <- seq(1, nrow(meta))

# now we will get the info from the geno object
geno_dat <- data.frame(
  beta = unlist(geno(vcf_sample)$ES),
  SE = unlist(geno(vcf_sample)$SE),
  AF = unlist(geno(vcf_sample)$AF)
)

df_data <- cbind(meta, geno_dat)
head(df_data)

## -----------------------------------------------------------------------------
df_data$OR <- exp(df_data$beta)

## -----------------------------------------------------------------------------
df_data <- CaseControl_AF(data = df_data,
                              N_case = 48286,
                              N_control = 250671,
                              OR_colname = "OR",
                              AF_total_colname = "AF")
head(df_data)

## -----------------------------------------------------------------------------
plotdata_AF <- df_data %>% dplyr::select(AF, AF_case, AF_control)
plotdata_AF$AF_total <- (plotdata_AF$AF_case*48286 + plotdata_AF$AF_control*250671)/(298857)
colnames(plotdata_AF)[1] <- "AF(known)"
plotdata_AF_long <- pivot_longer(plotdata_AF, cols = colnames(plotdata_AF))

## -----------------------------------------------------------------------------
ggplot(plotdata_AF_long, aes(x = value, fill = name)) +
  geom_boxplot(alpha = 0.5, outliers = FALSE) +
  facet_wrap(~name, nrow = 4) +
  theme_bw()

## ----CCAFE_convertVCF Demo----------------------------------------------------
df_data_2 <- CCAFE_convertVCF(vcf_sample)

## -----------------------------------------------------------------------------
df_data_2 <- CaseControl_AF(data = df_data_2,
                              N_case = 48286,
                              N_control = 250671,
                              OR_colname = "OR",
                              AF_total_colname = "AF")
head(df_data_2)

## -----------------------------------------------------------------------------
sessionInfo()

