## ----include = FALSE----------------------------------------------------------
library(immReferent)
library(BiocStyle)

# Make chunks robust on CI: evaluate IMGT or OGRDB examples only if the site is reachable
imgt_ok  <- try(is_imgt_available(),  silent = TRUE)
ogrdb_ok <- try(is_ogrdb_available(), silent = TRUE)
imgt_ok  <- if (inherits(imgt_ok,  "try-error")) FALSE else isTRUE(imgt_ok)
ogrdb_ok <- if (inherits(ogrdb_ok, "try-error")) FALSE else isTRUE(ogrdb_ok)

knitr::opts_chunk$set(
  error   = FALSE,
  message = FALSE,
  warning = FALSE,
  tidy    = FALSE
)

set.seed(42)

## ----eval = F-----------------------------------------------------------------
# devtools::install_github("BorchLab/immReferent")

## ----eval = F-----------------------------------------------------------------
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("immReferent")

## ----setup--------------------------------------------------------------------
library(immReferent)

## ----get_hla, eval = imgt_ok--------------------------------------------------
# Download all available HLA protein sequences
# This will download the file to the cache on the first run
hla_prot <- getIMGT(gene = "HLA", 
                    type = "PROT")

# Inspect the result
print(hla_prot)
cat("Number of sequences:", length(hla_prot), "\n")
cat("First sequence name:", names(hla_prot)[1], "\n")

## ----get_ighv, eval = imgt_ok-------------------------------------------------
# Download human IGHV nucleotide sequences
ighv_nuc <- getIMGT(species = "human", 
                    gene = "IGHV", 
                    type = "NUC")

# Inspect the result
print(ighv_nuc)

## ----get_trb, eval = imgt_ok--------------------------------------------------
# Download all mouse TRB genes (V, D, J, and C)
trb_mouse <- getIMGT(species = "mouse",
                     gene = "TRB", 
                     type = "NUC")

# This object will contain TRBV, TRBD, TRBJ, and TRBC sequences
print(trb_mouse)

## ----ogrdb_igh_fasta, eval=ogrdb_ok-------------------------------------------
# Human IGH nucleotide sequences (gapped FASTA)
igh_ogrdb <- getOGRDB(
  species = "human",
  locus   = "IGH",
  type    = "NUC",
  format  = "FASTA_GAPPED"
)
igh_ogrdb

## ----ogrdb_igk_airr, eval=ogrdb_ok--------------------------------------------
# Human IGK sequences via AIRR JSON (parsed to DNAStringSet)
igk_airr <- getOGRDB(
  species = "human",
  locus   = "IGK",
  type    = "NUC",
  format  = "AIRR"
)
igk_airr

## ----list_imgt, eval=imgt_ok--------------------------------------------------
# List the full paths of all cached files
listIMGT()
listOGRDB()

## ----load_imgt, eval=imgt_ok--------------------------------------------------
# This will load from the cache if available, or download otherwise
ighv_nuc <- getIMGT(species = "human", 
                    gene = "IGHV", 
                    type = "NUC")

# This will load from the cache, or fail if not found and offline
ighv_nuc_from_cache <- loadIMGT(species = "human", 
                                gene = "IGHV", 
                                type = "NUC")

## ----eval=ogrdb_ok------------------------------------------------------------
# This will load from the cache if available, or download otherwise
igh_nuc <- getOGRDB(species = "human", 
                    locus = "IGH", 
                    type = "NUC", 
                    format = "FASTA_GAPPED")

# This will load from the cache, or fail if not found and offline
igh_from_cache <- loadOGRDB(species = "human", 
                            locus = "IGH", 
                            type = "NUC", 
                            format = "FASTA_GAPPED")

## ----refresh_imgt, eval=imgt_ok & ogrdb_ok------------------------------------
# Force a re-download of the human IGHV sequences
ighv_nuc_fresh <- refreshIMGT(species = "human", 
                              gene = "IGHV", 
                              type = "NUC")

# Force a re-download of human IGK (gapped FASTA)
igk_fresh <- refreshOGRDB(species = "human", 
                          locus = "IGK", 
                          type = "NUC", 
                          format = "FASTA_GAPPED")

## ----export_mixcr, eval = imgt_ok---------------------------------------------
# Download human IGH sequences
igh_seqs <- getIMGT(species = "human",
                    gene = "IGH",
                    type = "NUC",
                    suppressMessages = TRUE)

# Export to MiXCR format
mixcr_dir <- tempdir()
mixcr_files <- exportMiXCR(igh_seqs, 
                           mixcr_dir, 
                           chain = "IGH")

# View created files
print(mixcr_files)

# View first few lines of V gene file
if (!is.null(mixcr_files$v_genes)) {
  cat(head(readLines(mixcr_files$v_genes), 4), sep = "\n")
}

## ----export_trust4, eval = imgt_ok--------------------------------------------
# Export to TRUST4 format (includes constant regions by default)
trust4_file <- tempfile(fileext = ".fa")
exportTRUST4(igh_seqs, trust4_file)

# View header format
cat(head(readLines(trust4_file), 6), sep = "\n")

## ----export_cellranger, eval = imgt_ok----------------------------------------
# Export V genes for Cell Ranger
cellranger_file <- tempfile(fileext = ".fa")
exportCellRanger(igh_seqs, cellranger_file)

# View header format
cat(head(readLines(cellranger_file), 4), sep = "\n")

## ----export_igblast, eval = imgt_ok-------------------------------------------
# Export to IgBLAST format
igblast_dir <- tempdir()
igblast_files <- exportIgBLAST(igh_seqs, igblast_dir,
                                organism = "human",
                                receptor_type = "ig")

# View created files
print(igblast_files)

# View header format
if (!is.null(igblast_files$v_genes)) {
  cat(head(readLines(igblast_files$v_genes), 4), sep = "\n")
}

## -----------------------------------------------------------------------------
sessionInfo()

