### R code from vignette source 'ArtOfAlignmentInR.Rnw'

###################################################
### code chunk number 1: ArtOfAlignmentInR.Rnw:51-53
###################################################
options(continue=" ")
options(width=80)


###################################################
### code chunk number 2: expr0
###################################################
library(DECIPHER)
N0 <- ceiling(2^seq(1, 17))
N1 <- ceiling(2^seq(1, 12))
N2 <- ceiling(2^seq(1, 13))
N3 <- ceiling(2^seq(1, 16))
timings0 <- setNames(rep(0, length(N0)), N0)
timings1 <- setNames(rep(0, length(N1)), N1)
timings2 <- setNames(rep(0, length(N2)), N2)
timings3 <- setNames(rep(0, length(N3)), N3)
for (i in seq_len(max(length(N0), length(N1), length(N2), length(N3)))) {
	N <- 2^i
	string1 <- DNAStringSet(paste(sample(DNA_ALPHABET[1:4], N, replace = TRUE), collapse = ""))
	string2 <- replaceAt(string1,
		at=IRanges(sample(N, ceiling(N/5)), width=1),
		sample(c(DNA_ALPHABET[1:4], ""), ceiling(N/5), replace = TRUE))
	for (j in 0:3) {
		# align the sequences using two methods
		if (j == 0 && i <= length(N0)) {
			timings0[i] <- system.time(AlignPairs(string1, string2, verbose=FALSE))[["user.self"]]
		} else if (j == 1 && i <= length(N1)) {
			timings1[i] <- system.time(AlignProfiles(string1, string2, restrict=c(-1e10, 1e10, 1e10), anchor=NA))[["user.self"]]
		} else if (j == 2 && i <= length(N2)) {
			timings2[i] <- system.time(AlignProfiles(string1, string2, anchor=NA))[["user.self"]]
		} else if (j == 3 && i <= length(N3)) {
			timings3[i] <- system.time(AlignProfiles(string1, string2))[["user.self"]]
		}
	}
}

c0 <- lm(timings0 ~ N0)
c1 <- lm(timings1 ~ N1 + I(N1^2))
c2 <- lm(timings2 ~ N2)
c3 <- lm(timings3 ~ N3)

N <- seq(1, max(N0, N1, N2, N3), length.out=1000) # prediction range
plot(N0, timings0,
	xlab = "Sequence length (nucleotides)",
	ylab = "Elapsed Time (sec.)",
	main = "",
	ylim=c(range(timings0,
		timings1,
		timings2,
		timings3)),
	xlim=c(0, N[length(N)]))
points(N, predict(c0,
		data.frame(N0 = N)),
	type="l", lty=3)
points(N1, timings1,
	col="blue", pch=0)
points(N, predict(c1,
		data.frame(N1 = N)),
	type="l", lty=3, col="blue")
points(N2, timings2,
	col="red", pch=5)
points(N, predict(c2,
		data.frame(N2 = N)),
	type="l", lty=3, col="red")
N <- seq(1, max(N3), length.out=1000) # prediction range
points(N3, timings3,
	col="green", pch=2)
points(N, predict(c3,
		data.frame(N3 = N)),
	type="l", lty=3, col="green")
legend("topright",
	c("AlignProfiles (unrestricted, unanchored)",
		"AlignProfiles (restricted, unanchored)",
		"AlignProfiles (restricted, anchored)",
		"AlignPairs (adaptive banding)"),
	pch=c(0, 5, 2, 1), lty=3,
	col=c("blue", "red", "green", "black"), bg="white")


###################################################
### code chunk number 3: expr1
###################################################
library(DECIPHER)

# specify the path to your sequence file:
fas <- "<<path to FASTA file>>"
# OR find the example sequence file used in this tutorial:
fas <- system.file("extdata", "50S_ribosomal_protein_L2.fas", package="DECIPHER")

dna <- readDNAStringSet(fas)
dna # the unaligned sequences


###################################################
### code chunk number 4: expr2 (eval = FALSE)
###################################################
## AA <- AlignTranslation(dna, type="AAStringSet") # align the translation
## BrowseSeqs(AA, highlight=1) # view the alignment
## 
## DNA <- AlignSeqs(dna) # align the sequences directly without translation
## DNA <- AlignTranslation(dna) # align the translation then reverse translate
## 
## # write the aligned sequences to a FASTA file
## writeXStringSet(DNA, file="<<path to output file>>")


###################################################
### code chunk number 5: expr3 (eval = FALSE)
###################################################
## # using a mixture of standard and non-standard genetic codes
## gC1 <- getGeneticCode(id_or_name2="1", full.search=FALSE, as.data.frame=FALSE)
## # Mollicutes use an alternative genetic code
## gC2 <- getGeneticCode(id_or_name2="4", full.search=FALSE, as.data.frame=FALSE)
## w <- grep("Mycoplasma|Ureaplasma", names(dna))
## gC <- vector("list", length(dna))
## gC[-w] <- list(gC1)
## gC[w] <- list(gC2)
## AA <- AlignTranslation(dna, geneticCode=gC, type="AAStringSet")


###################################################
### code chunk number 6: expr4 (eval = FALSE)
###################################################
## u_dna <- unique(dna) # the unique input sequences
## index <- match(dna, u_dna) # de-replication index
## 
## U_DNA <- AlignSeqs(u_dna) # align the sequences directly without translation
## DNA <- U_DNA[index]
## names(DNA) <- names(dna) # the re-replicated alignment


###################################################
### code chunk number 7: expr5 (eval = FALSE)
###################################################
## # database containing 16S ribosomal RNA sequences
## fas <- system.file("extdata", "Bacteria_175seqs.fas", package="DECIPHER")
## dna <- readDNAStringSet(fas)
## rna <- RemoveGaps(RNAStringSet(dna))
## # or if starting with DNA sequences, convert to RNA with:
## # rna <- RNAStringSet(dna)
## # or import RNA sequences directly using:
## # rna <- readRNAStringSet("<<path to FASTA file>>")
## 
## alignedRNA <- AlignSeqs(rna) # align with RNA secondary structure


###################################################
### code chunk number 8: expr6 (eval = FALSE)
###################################################
## half <- floor(length(dna)/2)
## dna1 <- dna[1:half] # first half
## dna2 <- dna[(half + 1):length(dna)] # second half
## 
## AA1 <- AlignTranslation(dna1, type="AAStringSet")
## AA2 <- AlignTranslation(dna2, type="AAStringSet")
## AA <- AlignProfiles(AA1, AA2) # align two alignments


###################################################
### code chunk number 9: expr7 (eval = FALSE)
###################################################
## # Align DNA sequences stored in separate tables:
## dbConn <- dbConnect(dbDriver("SQLite"), ":memory:")
## Seqs2DB(AA1, "DNAStringSet", dbConn, "AA1", tblName="AA1")
## Seqs2DB(AA2, "DNAStringSet", dbConn, "AA2", tblName="AA2")
## AlignDB(dbConn, tblName=c("AA1", "AA2"), add2tbl="AA",
##         type="AAStringSet")
## AA <- SearchDB(dbConn, tblName="AA", type="AAStringSet")
## BrowseDB(dbConn, tblName="AA")
## dbDisconnect(dbConn)


###################################################
### code chunk number 10: expr8 (eval = FALSE)
###################################################
## # form a chained guide tree
## gT <- lapply(order(width(dna), decreasing=TRUE),
## 	function(x) {
## 		attr(x, "height") <- 0
## 		attr(x, "label") <- names(dna)[x]
## 		attr(x, "members") <- 1L
## 		attr(x, "leaf") <- TRUE
## 		x
## 	})
## attr(gT, "height") <- 0.5
## attr(gT, "members") <- length(dna)
## class(gT) <- "dendrogram"
## 
## # use the guide tree as input for alignment
## DNA <- AlignTranslation(dna,
## 	guideTree=gT,
## 	iterations=0,
## 	refinements=0)


###################################################
### code chunk number 11: expr9 (eval = FALSE)
###################################################
## BrowseSeqs(DNA, highlight=0)


###################################################
### code chunk number 12: expr10 (eval = FALSE)
###################################################
## DNA_adjusted <- AdjustAlignment(DNA)


###################################################
### code chunk number 13: expr11 (eval = FALSE)
###################################################
## DNA_staggered <- StaggerAlignment(DNA)


###################################################
### code chunk number 14: expr12 (eval = FALSE)
###################################################
## db <- system.file("extdata", "Influenza.sqlite", package="DECIPHER")
## synteny <- FindSynteny(db, verbose=FALSE)
## synteny # an object of class `Synteny`
## InfluenzaA <- AlignSynteny(synteny, db, verbose=FALSE)
## unlist(InfluenzaA[[1]])


###################################################
### code chunk number 15: expr13 (eval = FALSE)
###################################################
## pairs(synteny, boxBlocks=TRUE) # scatterplot matrix


###################################################
### code chunk number 16: sessinfo
###################################################
toLatex(sessionInfo(), locale=FALSE)
