## ----style, echo = FALSE, results = 'asis'----------------
BiocStyle::markdown()
options(width = 60, max.print = 1000)
knitr::opts_chunk$set(
    eval = as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache = as.logical(Sys.getenv("KNITR_CACHE", "TRUE")), 
    tidy.opts = list(width.cutoff = 60), tidy = TRUE)

## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE----
suppressPackageStartupMessages({
    library(systemPipeR)
})

## ----generate_workenvir, eval=FALSE-----------------------
# library(systemPipeRdata)
# genWorkenvir(workflow = "chipseq", mydirname = "chipseq")
# setwd("chipseq")

## ----load_targets_file, eval=TRUE-------------------------
targetspath <- system.file("extdata", "targetsPE_chip.txt", package = "systemPipeR")
targets <- read.delim(targetspath, comment.char = "#")
targets[1:4,-c(5,6)]

## ----project_chipseq, eval=FALSE--------------------------
# library(systemPipeR)
# sal <- SPRproject()
# sal <- importWF(sal, file_path = "systemPipeChIPseq.Rmd", verbose = FALSE)
# sal

## ----run_chipseq, eval=FALSE------------------------------
# sal <- runWF(sal)

## ----plot_chipseq, eval=FALSE-----------------------------
# plotWF(sal)

## ----chipseq-toplogy, eval=TRUE, warning= FALSE, echo=FALSE, out.width="100%", fig.align = "center", fig.cap= "Toplogy graph of ChIP-Seq workflow.", warning=FALSE----
knitr::include_graphics("results/plotwf_chipseq.png")

## ----report_chipseq, eval=FALSE---------------------------
# # Scientific report
# sal <- renderReport(sal)
# rmarkdown::render("systemPipeChIPseq.Rmd", clean = TRUE, output_format = "BiocStyle::html_document")
# 
# # Technical (log) report
# sal <- renderLogs(sal)

## ----status_chipseq, eval=FALSE---------------------------
# statusWF(sal)

## ----load_SPR, message=FALSE, eval=FALSE, spr=TRUE--------
# cat(crayon::blue$bold("To use this workflow, following R packages are expected:\n"))
# cat(c("'ggbio", "ChIPseeker", "GenomicFeatures", "GenomicRanges", "Biostrings",
#        "seqLogo", "BCRANK", "readr'\n"), sep = "', '")
# targetspath <- system.file("extdata", "targetsPE_chip.txt", package = "systemPipeR")
# ###pre-end
# appendStep(sal) <- LineWise(code = {
#                             library(systemPipeR)
#                             }, step_name = "load_SPR")

## ----fastq_report, eval=FALSE, message=FALSE, spr=TRUE----
# appendStep(sal) <- LineWise(
#     code = {
#         targets <- read.delim(targetspath, comment.char = "#")
#         updateColumn(sal, step = "load_SPR", position = "targetsWF") <- targets
#         fq_files <- getColumn(sal, "load_SPR", "targetsWF", column = 1)
#         fqlist <- seeFastq(fastq = fq_files, batchsize = 10000, klength = 8)
#         png("./results/fastqReport.png", height = 162, width = 288 * length(fqlist))
#         seeFastqPlot(fqlist)
#         dev.off()
#     },
#     step_name = "fastq_report",
#     dependency = "load_SPR"
# )

## ----preprocessing, message=FALSE, eval=FALSE, spr=TRUE----
# appendStep(sal) <- SYSargsList(
#     step_name = "preprocessing",
#     targets = targetspath, dir = TRUE,
#     wf_file = "preprocessReads/preprocessReads-pe.cwl",
#     input_file = "preprocessReads/preprocessReads-pe.yml",
#     dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#     inputvars = c(
#         FileName1 = "_FASTQ_PATH1_",
#         FileName2 = "_FASTQ_PATH2_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("fastq_report")
# )

## ----custom_preprocessing_function, eval=FALSE------------
# appendStep(sal) <- LineWise(
#     code = {
#         filterFct <- function(fq, cutoff = 20, Nexceptions = 0) {
#             qcount <- rowSums(as(quality(fq), "matrix") <= cutoff, na.rm = TRUE)
#             # Retains reads where Phred scores are >= cutoff with N exceptions
#             fq[qcount <= Nexceptions]
#         }
#         save(list = ls(), file = "param/customFCT.RData")
#     },
#     step_name = "custom_preprocessing_function",
#     dependency = "preprocessing"
# )

## ----editing_preprocessing, message=FALSE, eval=FALSE-----
# yamlinput(sal, "preprocessing")$Fct
# yamlinput(sal, "preprocessing", "Fct") <- "'filterFct(fq, cutoff=20, Nexceptions=0)'"
# yamlinput(sal, "preprocessing")$Fct ## check the new function
# cmdlist(sal, "preprocessing", targets = 1) ## check if the command line was updated with success

## ----bowtie2_index, eval=FALSE, spr=TRUE------------------
# appendStep(sal) <- SYSargsList(
#     step_name = "bowtie2_index",
#     dir = FALSE, targets = NULL,
#     wf_file = "bowtie2/bowtie2-index.cwl",
#     input_file = "bowtie2/bowtie2-index.yml",
#     dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#     inputvars = NULL,
#     dependency = c("preprocessing")
# )

## ----bowtie2_alignment, eval=FALSE, spr=TRUE--------------
# appendStep(sal) <- SYSargsList(
#     step_name = "bowtie2_alignment",
#     dir = TRUE,
#     targets = targetspath,
#     wf_file = "workflow-bowtie2/workflow_bowtie2-pe.cwl",
#     input_file = "workflow-bowtie2/workflow_bowtie2-pe.yml",
#     dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#     inputvars = c(
#         FileName1 = "_FASTQ_PATH1_",
#         FileName2 = "_FASTQ_PATH2_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("bowtie2_index")
# )

## ----bowtie2_alignment_check, eval=FALSE------------------
# cmdlist(sal, step="bowtie2_alignment", targets=1)

## ----align_stats, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- LineWise(
#     code = {
#         fqpaths <- getColumn(sal, step = "bowtie2_alignment", "targetsWF", column = "FileName1")
#         bampaths <- getColumn(sal, step = "bowtie2_alignment", "outfiles", column = "samtools_sort_bam")
#         read_statsDF <- alignStats(args = bampaths, fqpaths = fqpaths, pairEnd = TRUE)
#         write.table(read_statsDF, "results/alignStats.xls", row.names=FALSE, quote=FALSE, sep="\t")
#         },
#     step_name = "align_stats",
#     dependency = "bowtie2_alignment")

## ----bam_IGV, eval=FALSE, spr=TRUE------------------------
# appendStep(sal) <- LineWise(
#     code = {
#         bampaths <- getColumn(sal, step = "bowtie2_alignment", "outfiles",
#                               column = "samtools_sort_bam")
#         symLink2bam(
#             sysargs = bampaths, htmldir = c("~/.html/", "somedir/"),
#             urlbase = "http://cluster.hpcc.ucr.edu/~tgirke/",
#             urlfile = "./results/IGVurl.txt")
#     },
#     step_name = "bam_IGV",
#     dependency = "bowtie2_alignment",
#     run_step = "optional"
# )

## ----rle_object, eval=FALSE-------------------------------
# bampaths <- getColumn(sal, step = "bowtie2_alignment", "outfiles", column = "samtools_sort_bam")
# aligns <- readGAlignments(bampaths[1])
# cov <- coverage(aligns)
# cov

## ----resize_align, eval=FALSE-----------------------------
# trim(resize(as(aligns, "GRanges"), width = 200))

## ----rle_slice, eval=FALSE--------------------------------
# islands <- slice(cov, lower = 15)
# islands[[1]]

## ----plot_coverage, eval=FALSE----------------------------
# library(ggbio)
# myloc <- c("Chr1", 1, 100000)
# ga <- readGAlignments(bampaths[1], use.names=TRUE,
#                       param=ScanBamParam(which=GRanges(myloc[1],
#                         IRanges(as.numeric(myloc[2]), as.numeric(myloc[3])))))
# autoplot(ga, aes(color = strand, fill = strand), facets = strand ~ seqnames, stat = "coverage")

## ----merge_bams, eval=FALSE, spr=TRUE---------------------
# appendStep(sal) <- LineWise(
#     code = {
#         bampaths <- getColumn(sal, step = "bowtie2_alignment", "outfiles", column = "samtools_sort_bam")
#         merge_bams <- mergeBamByFactor(args=bampaths, targetsDF = targetsWF(sal)[["bowtie2_alignment"]], out_dir = file.path("results", "merge_bam") ,overwrite=TRUE)
#         updateColumn(sal, step = "merge_bams", position = "targetsWF") <- merge_bams
#     },
#     step_name = "merge_bams",
#     dependency = "bowtie2_alignment"
# )

## ----call_peaks_macs_noref, eval=FALSE, spr=TRUE----------
# cat("Running preprocessing for call_peaks_macs_noref\n")
# # Previous Linewise step is not run at workflow building time,
# # but we need the output as input for this sysArgs step. So
# # we use some preprocess code to predict the output paths
# # to update the output targets of merge_bams, and then
# # them into this next step during workflow building phase.
# mergebam_out_dir = file.path("results", "merge_bam") # make sure this is the same output directory used in merge_bams
# targets_merge_bam <- targetsWF(sal)$bowtie2_alignment
# targets_merge_bam <- targets_merge_bam[, -which(colnames(targets_merge_bam) %in% c("FileName1", "FileName2", "FileName"))]
# targets_merge_bam <- targets_merge_bam[!duplicated(targets_merge_bam$Factor), ]
# targets_merge_bam <- cbind(FileName = file.path(mergebam_out_dir, paste0(targets_merge_bam$Factor, "_merged.bam")), targets_merge_bam)
# updateColumn(sal, step = "merge_bams", position = "targetsWF") <- targets_merge_bam
# # write it out as backup, so you do not need to use preprocess code above again
# writeTargets(sal, step = "merge_bams", file = "targets_merge_bams.txt", overwrite = TRUE)
# 
# ###pre-end
# appendStep(sal) <- SYSargsList(
#     step_name = "call_peaks_macs_noref",
#     targets = "targets_merge_bams.txt",
#     wf_file = "MACS2/macs2-noinput.cwl",
#     input_file = "MACS2/macs2-noinput.yml",
#     dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#     inputvars = c(
#         FileName = "_FASTQ_PATH1_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("merge_bams")
# )

## ----call_peaks_macs_withref, eval=FALSE, spr=TRUE--------
# cat("Running preprocessing for call_peaks_macs_withref\n")
# # To generate the reference targets file for the next step, use `writeTargetsRef`,
# # this file needs to be present at workflow building time
# # Use following preprocess code to do so:
# writeTargetsRef(infile = "targets_merge_bams.txt", outfile = "targets_bam_ref.txt", silent = FALSE, overwrite = TRUE)
# 
# ###pre-end
# appendStep(sal) <- SYSargsList(
#     step_name = "call_peaks_macs_withref",
#     targets = "targets_bam_ref.txt",
#     wf_file = "MACS2/macs2-input.cwl",
#     input_file = "MACS2/macs2-input.yml",
#     dir_path = system.file("extdata/cwl", package = "systemPipeR"),
#     inputvars = c(
#         FileName1 = "_FASTQ_PATH1_",
#         FileName2 = "_FASTQ_PATH2_",
#         SampleName = "_SampleName_"
#     ),
#     dependency = c("merge_bams")
# )

## ----consensus_peaks, eval=FALSE, spr=TRUE----------------
# appendStep(sal) <- LineWise(
#     code = {
#         peaks_files <- getColumn(sal, step = "call_peaks_macs_noref", "outfiles", column = "peaks_xls")
#         peak_M1A <- peaks_files["M1A"]
#         peak_M1A <- as(read.delim(peak_M1A, comment="#")[,1:3], "GRanges")
#         peak_A1A <- peaks_files["A1A"]
#         peak_A1A <- as(read.delim(peak_A1A, comment="#")[,1:3], "GRanges")
#         (myol1 <- subsetByOverlaps(peak_M1A, peak_A1A, minoverlap=1))
#             # Returns any overlap
#         myol2 <- olRanges(query=peak_M1A, subject=peak_A1A, output="gr")
#             # Returns any overlap with OL length information
#         myol2[values(myol2)["OLpercQ"][,1]>=50]
#             # Returns only query peaks with a minimum overlap of 50%
#     },
#     step_name = "consensus_peaks",
#     dependency = "call_peaks_macs_noref"
# )

## ----annotation_ChIPseeker, eval=FALSE, spr=TRUE----------
# appendStep(sal) <- LineWise(
#     code = {
#         library(ChIPseeker); library(GenomicFeatures)
#         peaks_files <- getColumn(sal, step = "call_peaks_macs_noref", "outfiles", column = "peaks_xls")
#         txdb <- suppressWarnings(makeTxDbFromGFF(file="data/tair10.gff", format="gff", dataSource="TAIR",
#                                 organism="Arabidopsis thaliana"))
#         for(i in seq(along=peaks_files)) {
#             peakAnno <- annotatePeak(peaks_files[i], TxDb=txdb, verbose=FALSE)
#             df <- as.data.frame(peakAnno)
#             outpaths <- paste0("./results/", names(peaks_files), "_ChIPseeker_annotated.xls")
#             names(outpaths) <- names(peaks_files)
#             write.table(df, outpaths[i], quote=FALSE, row.names=FALSE, sep="\t")
#         }
#         updateColumn(sal, step = "annotation_ChIPseeker", position = "outfiles") <- data.frame(outpaths)
#     },
#     step_name = "annotation_ChIPseeker",
#     dependency = "call_peaks_macs_noref"
# )

## ----ChIPseeker_plots, eval=FALSE, spr=TRUE---------------
# appendStep(sal) <- LineWise(
#     code = {
#         peaks_files <- getColumn(sal, step = "call_peaks_macs_noref", "outfiles", column = "peaks_xls")
#         peak <- readPeakFile(peaks_files[1])
#         png("results/peakscoverage.png")
#         covplot(peak, weightCol="X.log10.pvalue.")
#         dev.off()
#         png("results/peaksHeatmap.png")
#         peakHeatmap(peaks_files[1], TxDb=txdb, upstream=1000, downstream=1000,
#                     color="red")
#         dev.off()
#         png("results/peaksProfile.png")
#         plotAvgProf2(peaks_files[1], TxDb=txdb, upstream=1000, downstream=1000,
#                      xlab="Genomic Region (5'->3')", ylab = "Read Count Frequency",
#                      conf=0.05)
#         dev.off()
#     },
#     step_name = "ChIPseeker_plots",
#     dependency = "annotation_ChIPseeker"
# )

## ----annotation_ChIPpeakAnno, eval=FALSE, spr=TRUE--------
# appendStep(sal) <- LineWise(
#     code = {
#         library(ChIPpeakAnno); library(GenomicFeatures)
#         peaks_files <- getColumn(sal, step = "call_peaks_macs_noref", "outfiles", column = "peaks_xls")
#         txdb <- suppressWarnings(makeTxDbFromGFF(file="data/tair10.gff", format="gff", dataSource="TAIR",
#                                 organism="Arabidopsis thaliana"))
#         ge <- genes(txdb, columns=c("tx_name", "gene_id", "tx_type"))
#         for(i in seq(along=peaks_files)) {
#             peaksGR <- as(read.delim(peaks_files[i], comment="#"), "GRanges")
#             annotatedPeak <- annotatePeakInBatch(peaksGR, AnnotationData=genes(txdb))
#             df <- data.frame(as.data.frame(annotatedPeak),
#                              as.data.frame(values(ge[values(annotatedPeak)$feature,])))
#             df$tx_name <- as.character(lapply(df$tx_name, function(x) paste(unlist(x), sep='', collapse=', ')))
#             df$tx_type <- as.character(lapply(df$tx_type, function(x) paste(unlist(x), sep='', collapse=', ')))
#             outpaths <- paste0("./results/", names(peaks_files), "_ChIPpeakAnno_annotated.xls")
#             names(outpaths) <- names(peaks_files)
#             write.table(df, outpaths[i], quote=FALSE, row.names=FALSE, sep="\t")
#             }
#     },
#     step_name = "annotation_ChIPpeakAnno",
#     dependency = "call_peaks_macs_noref",
#     run_step = "optional"
# )

## ----count_peak_ranges, eval=FALSE, spr=TRUE--------------
# appendStep(sal) <- LineWise(
#     code = {
#         library(GenomicRanges)
#         bam_files <- getColumn(sal, step = "bowtie2_alignment", "outfiles", column = "samtools_sort_bam")
#         args <- getColumn(sal, step = "call_peaks_macs_noref", "outfiles", column = "peaks_xls")
#         outfiles <- paste0("./results/", names(args), "_countDF.xls")
#         bfl <- BamFileList(bam_files, yieldSize=50000, index=character())
#         countDFnames <- countRangeset(bfl, args, outfiles, mode="Union", ignore.strand=TRUE)
#         updateColumn(sal, step = "count_peak_ranges", position = "outfiles") <- data.frame(countDFnames)
#     },
#     step_name = "count_peak_ranges",
#     dependency = "call_peaks_macs_noref",
# )

## ----diff_bind_analysis, eval=FALSE, spr=TRUE-------------
# appendStep(sal) <- LineWise(
#     code = {
#         countDF_files <- getColumn(sal, step = "count_peak_ranges", "outfiles")
#         outfiles <- paste0("./results/", names(countDF_files), "_peaks_edgeR.xls")
#         names(outfiles) <- names(countDF_files)
#         cmp <- readComp(file =stepsWF(sal)[["bowtie2_alignment"]],
#                         format="matrix")
#         dbrlist <- runDiff(args=countDF_files, outfiles = outfiles, diffFct=run_edgeR,
#                            targets=targetsWF(sal)[["bowtie2_alignment"]], cmp=cmp[[1]],
#                            independent=TRUE, dbrfilter=c(Fold=2, FDR=1))
#     },
#     step_name = "diff_bind_analysis",
#     dependency = "count_peak_ranges",
# )

## ----go_enrich, eval=FALSE, spr=TRUE----------------------
# appendStep(sal) <- LineWise(
#     code = {
#         annofiles <- getColumn(sal, step = "annotation_ChIPseeker", "outfiles")
#         gene_ids <- sapply(annofiles,
#                            function(x) unique(as.character
#                                               (read.delim(x)[,"geneId"])), simplify=FALSE)
#         load("data/GO/catdb.RData")
#         BatchResult <- GOCluster_Report(catdb=catdb, setlist=gene_ids, method="all",
#                                         id_type="gene", CLSZ=2, cutoff=0.9,
#                                         gocats=c("MF", "BP", "CC"), recordSpecGO=NULL)
#         write.table(BatchResult, "results/GOBatchAll.xls", quote=FALSE, row.names=FALSE, sep="\t")
#     },
#     step_name = "go_enrich",
#     dependency = "annotation_ChIPseeker",
# )

## ----parse_peak_sequences, eval=FALSE, spr=TRUE-----------
# appendStep(sal) <- LineWise(
#     code = {
#         library(Biostrings); library(seqLogo); library(BCRANK)
#         rangefiles <- getColumn(sal, step = "call_peaks_macs_noref", "outfiles")
#         for(i in seq(along=rangefiles)) {
#             df <- read.delim(rangefiles[i], comment="#")
#             peaks <- as(df, "GRanges")
#             names(peaks) <- paste0(as.character(seqnames(peaks)), "_", start(peaks),
#                                    "-", end(peaks))
#             peaks <- peaks[order(values(peaks)$X.log10.pvalue., decreasing=TRUE)]
#             pseq <- getSeq(FaFile("./data/tair10.fasta"), peaks)
#             names(pseq) <- names(peaks)
#             writeXStringSet(pseq, paste0(rangefiles[i], ".fasta"))
#             }
#         },
#     step_name = "parse_peak_sequences",
#     dependency = "call_peaks_macs_noref",
# )

## ----bcrank_enrich, eval=FALSE, spr=TRUE------------------
# appendStep(sal) <- LineWise(
#     code = {
#         library(Biostrings); library(seqLogo); library(BCRANK)
#         rangefiles <- getColumn(sal, step = "call_peaks_macs_noref", "outfiles")
#         set.seed(0)
#         BCRANKout <- bcrank(paste0(rangefiles[1], ".fasta"), restarts=25,
#                             use.P1=TRUE, use.P2=TRUE)
#         toptable(BCRANKout)
#         topMotif <- toptable(BCRANKout, 1)
#         weightMatrix <- pwm(topMotif, normalize = FALSE)
#         weightMatrixNormalized <- pwm(topMotif, normalize = TRUE)
#         png("results/seqlogo.png")
#         seqLogo(weightMatrixNormalized)
#         dev.off()
#         },
#     step_name = "bcrank_enrich",
#     dependency = "call_peaks_macs_noref",
# )

## ----sessionInfo, eval=FALSE, spr=TRUE--------------------
# appendStep(sal) <- LineWise(
#     code = {
#         sessionInfo()
#         },
#     step_name = "sessionInfo",
#     dependency = "bcrank_enrich"
# )

## ----runWF, eval=FALSE------------------------------------
# sal <- runWF(sal)

## ----runWF_cluster, eval=FALSE----------------------------
# # wall time in mins, memory in MB
# resources <- list(conffile=".batchtools.conf.R",
#                   template="batchtools.slurm.tmpl",
#                   Njobs=18,
#                   walltime=120,
#                   ntasks=1,
#                   ncpus=4,
#                   memory=1024,
#                   partition = "short"
#                   )
# sal <- addResources(sal, c("bowtie2_alignment"), resources = resources)
# sal <- runWF(sal)

## ----plotWF, eval=FALSE-----------------------------------
# plotWF(sal, rstudio = TRUE)

## ----statusWF, eval=FALSE---------------------------------
# sal
# statusWF(sal)

## ----logsWF, eval=FALSE-----------------------------------
# sal <- renderLogs(sal)

## ----list_tools-------------------------------------------
if(file.exists(file.path(".SPRproject", "SYSargsList.yml"))) {
    local({
        sal <- systemPipeR::SPRproject(resume = TRUE)
        systemPipeR::listCmdTools(sal)
        systemPipeR::listCmdModules(sal)
    })
} else {
    cat(crayon::blue$bold("Tools and modules required by this workflow are:\n"))
    cat(c("BLAST 2.14.0+"), sep = "\n")
}

## ----report_session_info, eval=TRUE-----------------------
sessionInfo()

