### R code from vignette source 'ArtOfAlignmentInR.Rnw' ################################################### ### code chunk number 1: ArtOfAlignmentInR.Rnw:51-53 ################################################### options(continue=" ") options(width=80) ################################################### ### code chunk number 2: expr0 ################################################### library(DECIPHER) N0 <- ceiling(2^seq(1, 17)) N1 <- ceiling(2^seq(1, 12)) N2 <- ceiling(2^seq(1, 13)) N3 <- ceiling(2^seq(1, 16)) timings0 <- setNames(rep(0, length(N0)), N0) timings1 <- setNames(rep(0, length(N1)), N1) timings2 <- setNames(rep(0, length(N2)), N2) timings3 <- setNames(rep(0, length(N3)), N3) for (i in seq_len(max(length(N0), length(N1), length(N2), length(N3)))) { N <- 2^i string1 <- DNAStringSet(paste(sample(DNA_ALPHABET[1:4], N, replace = TRUE), collapse = "")) string2 <- replaceAt(string1, at=IRanges(sample(N, ceiling(N/5)), width=1), sample(c(DNA_ALPHABET[1:4], ""), ceiling(N/5), replace = TRUE)) for (j in 0:3) { # align the sequences using two methods if (j == 0 && i <= length(N0)) { timings0[i] <- system.time(AlignPairs(string1, string2, verbose=FALSE))[["user.self"]] } else if (j == 1 && i <= length(N1)) { timings1[i] <- system.time(AlignProfiles(string1, string2, restrict=c(-1e10, 1e10, 1e10), anchor=NA))[["user.self"]] } else if (j == 2 && i <= length(N2)) { timings2[i] <- system.time(AlignProfiles(string1, string2, anchor=NA))[["user.self"]] } else if (j == 3 && i <= length(N3)) { timings3[i] <- system.time(AlignProfiles(string1, string2))[["user.self"]] } } } c0 <- lm(timings0 ~ N0) c1 <- lm(timings1 ~ N1 + I(N1^2)) c2 <- lm(timings2 ~ N2) c3 <- lm(timings3 ~ N3) N <- seq(1, max(N0, N1, N2, N3), length.out=1000) # prediction range plot(N0, timings0, xlab = "Sequence length (nucleotides)", ylab = "Elapsed Time (sec.)", main = "", ylim=c(range(timings0, timings1, timings2, timings3)), xlim=c(0, N[length(N)])) points(N, predict(c0, data.frame(N0 = N)), type="l", lty=3) points(N1, timings1, col="blue", pch=0) points(N, predict(c1, data.frame(N1 = N)), type="l", lty=3, col="blue") points(N2, timings2, col="red", pch=5) points(N, predict(c2, data.frame(N2 = N)), type="l", lty=3, col="red") N <- seq(1, max(N3), length.out=1000) # prediction range points(N3, timings3, col="green", pch=2) points(N, predict(c3, data.frame(N3 = N)), type="l", lty=3, col="green") legend("topright", c("AlignProfiles (unrestricted, unanchored)", "AlignProfiles (restricted, unanchored)", "AlignProfiles (restricted, anchored)", "AlignPairs (adaptive banding)"), pch=c(0, 5, 2, 1), lty=3, col=c("blue", "red", "green", "black"), bg="white") ################################################### ### code chunk number 3: expr1 ################################################### library(DECIPHER) # specify the path to your sequence file: fas <- "<>" # OR find the example sequence file used in this tutorial: fas <- system.file("extdata", "50S_ribosomal_protein_L2.fas", package="DECIPHER") dna <- readDNAStringSet(fas) dna # the unaligned sequences ################################################### ### code chunk number 4: expr2 (eval = FALSE) ################################################### ## AA <- AlignTranslation(dna, type="AAStringSet") # align the translation ## BrowseSeqs(AA, highlight=1) # view the alignment ## ## DNA <- AlignSeqs(dna) # align the sequences directly without translation ## DNA <- AlignTranslation(dna) # align the translation then reverse translate ## ## # write the aligned sequences to a FASTA file ## writeXStringSet(DNA, file="<>") ################################################### ### code chunk number 5: expr3 (eval = FALSE) ################################################### ## # using a mixture of standard and non-standard genetic codes ## gC1 <- getGeneticCode(id_or_name2="1", full.search=FALSE, as.data.frame=FALSE) ## # Mollicutes use an alternative genetic code ## gC2 <- getGeneticCode(id_or_name2="4", full.search=FALSE, as.data.frame=FALSE) ## w <- grep("Mycoplasma|Ureaplasma", names(dna)) ## gC <- vector("list", length(dna)) ## gC[-w] <- list(gC1) ## gC[w] <- list(gC2) ## AA <- AlignTranslation(dna, geneticCode=gC, type="AAStringSet") ################################################### ### code chunk number 6: expr4 (eval = FALSE) ################################################### ## u_dna <- unique(dna) # the unique input sequences ## index <- match(dna, u_dna) # de-replication index ## ## U_DNA <- AlignSeqs(u_dna) # align the sequences directly without translation ## DNA <- U_DNA[index] ## names(DNA) <- names(dna) # the re-replicated alignment ################################################### ### code chunk number 7: expr5 (eval = FALSE) ################################################### ## # database containing 16S ribosomal RNA sequences ## fas <- system.file("extdata", "Bacteria_175seqs.fas", package="DECIPHER") ## dna <- readDNAStringSet(fas) ## rna <- RemoveGaps(RNAStringSet(dna)) ## # or if starting with DNA sequences, convert to RNA with: ## # rna <- RNAStringSet(dna) ## # or import RNA sequences directly using: ## # rna <- readRNAStringSet("<>") ## ## alignedRNA <- AlignSeqs(rna) # align with RNA secondary structure ################################################### ### code chunk number 8: expr6 (eval = FALSE) ################################################### ## half <- floor(length(dna)/2) ## dna1 <- dna[1:half] # first half ## dna2 <- dna[(half + 1):length(dna)] # second half ## ## AA1 <- AlignTranslation(dna1, type="AAStringSet") ## AA2 <- AlignTranslation(dna2, type="AAStringSet") ## AA <- AlignProfiles(AA1, AA2) # align two alignments ################################################### ### code chunk number 9: expr7 (eval = FALSE) ################################################### ## # Align DNA sequences stored in separate tables: ## dbConn <- dbConnect(dbDriver("SQLite"), ":memory:") ## Seqs2DB(AA1, "DNAStringSet", dbConn, "AA1", tblName="AA1") ## Seqs2DB(AA2, "DNAStringSet", dbConn, "AA2", tblName="AA2") ## AlignDB(dbConn, tblName=c("AA1", "AA2"), add2tbl="AA", ## type="AAStringSet") ## AA <- SearchDB(dbConn, tblName="AA", type="AAStringSet") ## BrowseDB(dbConn, tblName="AA") ## dbDisconnect(dbConn) ################################################### ### code chunk number 10: expr8 (eval = FALSE) ################################################### ## # form a chained guide tree ## gT <- lapply(order(width(dna), decreasing=TRUE), ## function(x) { ## attr(x, "height") <- 0 ## attr(x, "label") <- names(dna)[x] ## attr(x, "members") <- 1L ## attr(x, "leaf") <- TRUE ## x ## }) ## attr(gT, "height") <- 0.5 ## attr(gT, "members") <- length(dna) ## class(gT) <- "dendrogram" ## ## # use the guide tree as input for alignment ## DNA <- AlignTranslation(dna, ## guideTree=gT, ## iterations=0, ## refinements=0) ################################################### ### code chunk number 11: expr9 (eval = FALSE) ################################################### ## BrowseSeqs(DNA, highlight=0) ################################################### ### code chunk number 12: expr10 (eval = FALSE) ################################################### ## DNA_adjusted <- AdjustAlignment(DNA) ################################################### ### code chunk number 13: expr11 (eval = FALSE) ################################################### ## DNA_staggered <- StaggerAlignment(DNA) ################################################### ### code chunk number 14: expr12 (eval = FALSE) ################################################### ## db <- system.file("extdata", "Influenza.sqlite", package="DECIPHER") ## synteny <- FindSynteny(db, verbose=FALSE) ## synteny # an object of class `Synteny` ## InfluenzaA <- AlignSynteny(synteny, db, verbose=FALSE) ## unlist(InfluenzaA[[1]]) ################################################### ### code chunk number 15: expr13 (eval = FALSE) ################################################### ## pairs(synteny, boxBlocks=TRUE) # scatterplot matrix ################################################### ### code chunk number 16: sessinfo ################################################### toLatex(sessionInfo(), locale=FALSE)