## ----eval=TRUE, warnings = FALSE, echo=TRUE,message=FALSE---------------------

library(ChIPanalyser)

#Load data
data(ChIPanalyserData)


# Loading DNASequenceSet from BSgenome object
# We recommend using the latest version of the genome 
# Please ensure that all your data is aligned to the same version of the genome

library(BSgenome.Dmelanogaster.UCSC.dm6)

DNASequenceSet <- getSeq(BSgenome.Dmelanogaster.UCSC.dm6)



#Loading Position Frequency Matrix

PFM <- file.path(system.file("extdata",package="ChIPanalyser"),"BEAF-32.pfm")

#Checking if correctly loaded
ls()


## ----eval=TRUE, warnings = FALSE----------------------------------------------
chip <- processingChIP(profile = chip,
                      loci = top,
                      cores = 1)
chip

## ----eval =TRUE---------------------------------------------------------------
# PFMs are automatically converted to PWM when build genomicProfiles
GP <- genomicProfiles(PFM = PFM, PFMFormat = "JASPAR")
GP

## ----eval=FALSE---------------------------------------------------------------
# GP <- genomicProfiles(PWM=PositionWeightMatrix)

## ----eval=TRUE,warnings=FALSE-------------------------------------------------
## surpress dependency warnings
optimal <- suppressWarnings(computeOptimal(genomicProfiles = GP,
                        DNASequenceSet = DNASequenceSet,
                        ChIPScore = chip,
                        chromatinState = Access))

## ----eval=TRUE, echo=TRUE-----------------------------------------------------
 ## Lambda Values
 seq(0.25,5,by=0.25)

 ## Bound Molecule Values
 c(1, 10, 20, 50, 100,
    200, 500,1000,2000, 5000,10000,20000,50000, 100000,
    200000, 500000, 1000000)

## ----eval =T------------------------------------------------------------------
optimalParam <- optimal$Optimal
optimalParam$OptimalParameters

## ----eval=TRUE, warnings = FALSE, fig.width=10, fig.height=8------------------
# Plotting Optimal heat maps
par(oma=c(0,0,3,0))
layout(matrix(1:8,ncol=4, byrow=T),width=c(6,1.5,6,1.5),height=c(1,1))
plotOptimalHeatMaps(optimalParam,layout=FALSE)


## ----eval=TRUE----------------------------------------------------------------
optimalParam <- searchSites(optimal,lambdaPWM = 1.25,BoundMolecules = 10000)

## ----eval=TRUE,fig.width=15, fig.height=8-------------------------------------
plotOccupancyProfile(predictedProfile = optimalParam$ChIPProfiles,
                     ChIPScore = chip,
                     chromatinState = Access,
                     occupancy = optimalParam$Occupancy,
                     goodnessOfFit = optimalParam$goodnessOfFit,
                     geneRef = geneRef)

## ----eval =TRUE---------------------------------------------------------------
## Suggested Parameters to start with.
parameterOptions()

## Changing parameters
PO <- parameterOptions(noiseFilter="sigmoid",chipSd=150,chipMean=150,lociWidth=30000)

## ----eval=FALSE---------------------------------------------------------------
# ## Top 50 loci based on ChIP score
# processingChIP(profile = "/path/to/ChIP",
#                loci = NULL,
#                reduce = 50,
#                parameterOptions = PO)
# 
# ## Top 50 loci ALSO containing peaks
# processingChIP(profile = "/path/to/ChIP",
#                loci = NULL,
#                reduce = 50,
#                peaks = "/path/to/peaks",
#                parameterOptions=PO)
# 
# ## Top 50 loci containing BOTH peaks and Accessible DNA
# processingChIP(profile = "/path/to/ChIP",
#                loci = NULL,
#                reduce = 50,
#                peaks = "/path/to/peaks",
#                chromatinState = "/path/to/chromatinState",
#                parameterOptions = PO)
# 

## ----eval=TRUE----------------------------------------------------------------
str(genomicProfiles())

GP <- genomicProfiles(PFM=PFM, PFMFormat="JASPAR", BPFrequency=DNASequenceSet)
GP

## ----eval=FALSE---------------------------------------------------------------
# ## Parsing pre computed parameters (processingChIP function)
# GP <- genomicProfiles(PFM = PFM,
#                      PFMFormat = "JASPAR",
#                      BPFrequency = DNASequenceSet,
#                      ChIPScore = ChIPScore)
# 
# ## Parsing pre assigned function (parameterOptions)
# parameterOptions <- parameterOptions(lambdaPWM = c(1,2,3),
#                                      boundMolecules = c(5,50,500))
# GP <- genomicProfiles(PFM = PFM,
#                      PFMFormat = "JASPAR",
#                      BPFrequency = DNASequenceSet,
#                      parameterOptions = parameterOptions)
# 
# ## Direct parameter assignement
# 
# GP <- genomicProfiles(PFM = PFM,
#                      PFMFormat = "JASPAR",
#                      BPFrequency = DNASequenceSet,
#                      lambdaPWM = c(1,2,3),
#                      boundMolecules = c(4,500,8000))

## ----eval=FALSE---------------------------------------------------------------
# ## Setting custom parameters
# OP <- parameterOptions(lambdaPWM = seq(1,10,by=0.5),
#                       boundMolecules = seq(1,100000, length.out=20))
# 
# ## Computing ONLY Optimal Parameters and MSE as goodness Of Fit metric
# optimal <- computeOptimal(genomicProfiles = GP,
#                          DNASequenceSet = DNASequenceSet,
#                          ChIPScore = chip,
#                          chromatinState = Access,
#                          parameterOptions = OP,
#                          optimalMethod = "MSE",
#                          returnAll = FALSE)
# 
# ### Computing ONLY Optimal Parameters and using Rank slection method
# optimal <- computeOptimal(genomicProfiles = GP,
#                          DNASequenceSet = DNASequenceSet,
#                          ChIPScore = chip,
#                          chromatinState = Access,
#                          parameterOptions = OP,
#                          optimalMethod = "all",
#                          rank=TRUE)
# 

## ----eval=FALSE---------------------------------------------------------------
# ## Extracted Optimal Parameters
# optimalParam <- optimal$Optimal
# 
# ## Plotting heat maps
# plotOptimalHeatMaps(optimalParam,overlay=TRUE)

## ----eval=TRUE,warnings=FALSE-------------------------------------------------

## Creating genomic Profiles object with PFMs and associated parameters
GP <- genomicProfiles(PFM = PFM,
                      PFMFormat = "JASPAR",
                      BPFrequency = DNASequenceSet,
                      lambdaPWM = 1, 
                      boundMolecules = 58794)

## Computing Genome Wide Score required
GW <- computeGenomeWideScores(genomicProfiles = GP,
                              DNASequenceSet = DNASequenceSet,
                              chromatinState = Access)
GW
## Computing PWM score above threshold
pwm <- computePWMScore(genomicProfiles = GW,
                       DNASequenceSet = DNASequenceSet,
                       loci = chip,
                       chromatinState = Access)
pwm
## Computing Occupancy of sites above threshold

occup <- computeOccupancy(genomicProfiles = pwm)
occup
## Compute ChIP seq like profiles

pred <- computeChIPProfile(genomicProfiles = occup,
                          loci = chip)
pred
## Compute goodness Of Fit of model
accu <- profileAccuracyEstimate(genomicProfiles = pred,
                                ChIPScore = chip)
accu


## ----eval=TRUE,fig.width=12, fig.height=4.5-----------------------------------

plotOccupancyProfile(predictedProfile=pred,
                     ChIPScore=chip,
                     chromatinState=Access,
                     occupancy=occup,
                     goodnessOfFit=accu,
                     geneRef=geneRef)

## ----eval=TRUE,echo=TRUE------------------------------------------------------
parameterOptions()

## ----eval=T, echo=T-----------------------------------------------------------
str(genomicProfiles())

## ----eval=F, echo=T-----------------------------------------------------------
#     ## Accessors and Setters for parameterOptions and genomicProfiles
#     avrageExpPWMScore(obj)
#     backgroundSignal(obj)
#     backgroundSignal(obj)<-value
#     boundMolecules(obj)
#     boundMolecules(obj)<-value
#     BPFrequency(obj)
#     BPFrequency(obj)<-value
#     chipMean(obj)
#     chipMean(obj)<-value
#     chipSd(obj)
#     chipSd(obj)<-value
#     chipSmooth(obj)
#     chipSmooth(obj)<-value
#     DNASequenceLength(obj)
#     drop(obj)
#     lambdaPWM(obj)
#     lambdaPWM(obj)<-value
#     lociWidth(obj)
#     lociWidth(obj)<-value
#     maxPWMScore(obj)
#     maxSignal(obj)
#     maxSignal(obj)<-value
#     minPWMScore(obj)
#     naturalLog(obj)
#     naturalLog(obj)<-value
#     noiseFilter(obj)
#     noiseFilter(obj)<-value
#     noOfSites(obj)
#     noOfSites(obj)<-value
#     PFMFormat(obj)
#     PFMFormat(obj)<-value
#     ploidy(obj)
#     ploidy(obj)<-value
#     PositionFrequencyMatrix(obj)
#     PositionFrequencyMatrix(obj)<-value
#     PositionWeightMatrix(obj)
#     PositionWeightMatrix(obj)<-value
#     profiles(obj)
#     PWMpseudocount(obj)
#     PWMpseudocount(obj)<-value
#     PWMThreshold(obj)
#     PWMThreshold(obj)<-value
#     removeBackground(obj)
#     removeBackground(obj)<-value
#     stepSize(obj)
#     stepSize(obj)<-value
#     strandRule(obj)
#     strandRule(obj)<-value
#     whichstrand(obj)
#     whichstrand(obj)<-value
#     ## ChIPScore slots accessors
#     loci(obj)
#     scores(obj)

## ----eval=T-------------------------------------------------------------------
sessionInfo()

