download code
## Chunk 1
 mychar = c("as", "soon", "as possible")
 mychar
 nchar(mychar)
## Chunk 2
 x=character(0) #length 0 character vector
 length(x)
 nchar(x)
 y = ""  #length 0 string
 length(y)
 nchar(y)
## Chunk 3
substr(x, 2, 4)
substr(x, 2, rep(4,5))
substring(x, 2, rep(4,5))
## Chunk 4
 rD = randDNA(102)
 rDtriples = substring(rD, seq(1, 102, by=3), seq(3, 102, 3))
 paste(GENETIC_CODE[rDtriples])
  (solution chunk)
## Chunk 6
substring(x, 2, 4) = "abc"
x
x=c("howdy", "dudey friend")
substr(x, 2, 4) = "def"
x
substring(x, 2) <- c("..", "+++")
## Chunk 7
paste(1:3, "+", 4:5)
paste(1:3, 1:3, 4:6, sep="+")
## Chunk 8
paste(1:4, collapse="=")
## Chunk 9
strsplit(c("ab", "cde", "XYZ"), c("Y", ""))
strsplit(c("ab", "cde", "XYZ"), c("Y", NULL))
## Chunk 10
 x <- paste(readLines(file.path(R.home(), "COPYING")), collapse = "\n")
strwrap(x, 30, prefix="myidea: ")[1:10]
writeLines(strwrap(x, 30, prefix="myidea: ")[1:5])
## Chunk 11
dna2rna = function(inputStr) {
   if(!is.character(inputStr))
      stop("need character input")
   is=toupper(inputStr)
   chartr("T", "U", is)
}
x=c(randDNA(15), randDNA(12))
x
dna2rna(x)
## Chunk 12
compSeq = function(x) 
   chartr("ACTG", "TGAC", x)
compSeq(x)
  (solution chunk)
  (solution chunk)
  (solution chunk)
## Chunk 16
 set.seed(123)
 x = sample(letters[1:10], 5)
 x
 sort(x)
 x < "m"
## Chunk 17
exT = c("Intron", "Exon", "Example", "Chromosome")
match("Exon", exT)
"Example" %in% exT
## Chunk 18
pmatch("E", exT)
pmatch("Exo", exT)
pmatch("I", exT)
charmatch("I", exT)
## Chunk 19
pmatch(c("I", "Int"), exT)
pmatch(c("I", "Int"), exT, duplicates.ok=TRUE)
charmatch(c("I", "Int"), exT)
## Chunk 20
pmatch(c("ab"), c("ab", "ab"))
charmatch(c("ab"), c("ab", "ab"))
## Chunk 21
'I\'m a string'
  "I'm a string"
## Chunk 22
 s = "I'm a backslash: \\"
 s
## Chunk 23
 cat(s)
## Chunk 24
noquote(s)
## Chunk 25
 nchar(s)
 strsplit(s, NULL)[[1]]
 nchar("\n")
 charToRaw("\n")
## Chunk 26
 fn = "c:\\My Documents\\foo.bar"
 fn
## Chunk 27
 old = "\\"
 new = "/"
 chartr(old, new, fn)
## Chunk 28
 gsub("\\\\", new, fn)
 gsub("\\", new, fn, fixed=TRUE) 
## Chunk 29
 v1 = parse(text="mean(1:10)")
 v1
 eval(v1)
 deparse(v1)
 deparse(v1[[1]])
## Chunk 30
##  Sys.getlocale()
  (solution chunk)
## Chunk 32
gregexpr("\\<", "my first anchor")
gregexpr("\\>", "my first anchor")
## Chunk 33
gregexpr("\\b", "once upon a time")
gregexpr("\\>", "once upon a time")
gregexpr("\\<", "once upon a time")
gregexpr("^", "once upon a time")
gregexpr("$", "once upon a time")
## Chunk 34
regexpr("r[^r]", "asffrb", perl=TRUE)
regexpr("r[^r]", "asffr", perl=TRUE)
regexpr("r(?!r)", "asffrb", perl=TRUE)
regexpr("r(?!r)", "asffr", perl=TRUE)
## Chunk 35
gregexpr("([A-Z])\\1", "ABBBZZ")
## Chunk 36
regexpr("AB{2,4}?", "ABBBBB")
regexpr("AB{2,4}?", "ABBBBB", perl=T)
## Chunk 37
regexpr("foo|foobar", "myfoobar")
regexpr("foo|foobar", "myfoobar", perl=TRUE)
## Chunk 38
testS = "ACTACCACTACCACT"
gregexpr("ACTACCACT", testS)
gregexpr2("ACTACCACT", testS)
## Chunk 39
regexpr("\\d\\d\\/\\d\\d\\/\\d\\d\\d\\d", 
        "today is 12/01/1977", perl = TRUE)
regexpr("\\d\\d\\/\\d\\d\\/\\d\\d\\d\\d", 
        "today is 21/41/1977", perl = TRUE)
  (solution chunk)
## Chunk 41
strwhite = function(x, lead=TRUE, trail=TRUE) {
  if(lead) 
   x = sub("^[[:blank:]]*", "", x, perl=TRUE)
  if(trail)
   sub("[[:blank:]]*$", "", x, perl=TRUE)
  else 
   x
}
## Chunk 42
prositeM = "[RK]-x(2,3)-[DE]-x(2,3)-Y."
regexM = gsub("-|\\.", "", prositeM)
regexM = chartr("xX()", "..{}", regexM)
## Chunk 43
testP = "ACRDRACDTUYACRD"
testN = "ACRDRAXXCDTUYACRD"
regexpr(regexM, testP)
regexpr(regexM, testN)
## Chunk 44
library("Biobase")
str1 = c("not now", "not as hard as wow", "not something new")
lcPrefix(str1)
lcSuffix(str1)
## Chunk 45
library("Rlibstree")
s1 = "biology"
getLongestSubstring(s1)
## Chunk 46
st1 = RNAString("UCUCCCAACCCUUGUACCAGUG")
cD = cDNA(st1)
## Chunk 47
  library("matchprobes")
  seq <- c("CGACTGAGACCAAGACCTACAACAG",
           "CCCGCATCATCTTTCCTGTGCTCTT")
 complementSeq(seq, start=13, stop=13)
  (solution chunk)
## Chunk 49
## library("BSgenome.Hsapiens.UCSC.hg18")
## Hsapiens
## Chunk 50
chr22NoN <- mask(Hsapiens$chr22, "N")
alphabetFrequency(Hsapiens$chr22, freq=TRUE)["N"]
## Chunk 51
TATA="TATAAAA"
mT = matchPattern(TATA, chr22NoN)
countPattern(TATA, chr22NoN)
## Chunk 52
mmT = matchPattern(TATA, chr22NoN, max.mismatch=1)
length(mmT)
mismatch(TATA, mmT[1:3])
## Chunk 53
 library(hgu95av2probe)
 dict <- hgu95av2probe$sequence 
 length(dict)                   
 unique(nchar(dict))            
 dict[1:5]
 pdict <- PDict(dict) 

 vindex <- matchPDict(pdict, Hsapiens$chr22) 
 length(vindex)                        
 count_index <- countIndex(vindex)
 sum(count_index)
 table(count_index)
## Chunk 54
   dict[count_index == max(count_index)]
   countPattern("CTGTAATCCCAGCACTTTGGGAGGC", Hsapiens$chr22)
## Chunk 55
 chr22_pals = findPalindromes(chr22NoN, min.armlength = 40,
                       max.looplength = 20)
 nchar(chr22_pals)
 palindromeArmLength(chr22_pals) 
 palindromeLeftArm(chr22_pals)
 ans = alphabetFrequency(chr22_pals,
       base = TRUE)
head(ans, n = 15)
  (solution chunk)
  (solution chunk)
## Chunk 58
  Lpattern <- "CTCCGAG"
  Rpattern <- "GTTCACA"
  LRans = matchLRPatterns(Lpattern, Rpattern, 500, Hsapiens$chr22)
  length(LRans)
## Chunk 59
   
  aa1 <- AAString("HXBLVYMGCHFDCXVBEHIKQZ")
  aa2 <- AAString("QRNYMYCFQCISGNEYKQN")
  needwunsQS(aa1, aa2, "BLOSUM62", gappen=3)
  ## See how the gap penalty influences the alignment
  needwunsQS(aa1, aa2, "BLOSUM62", gappen=8)
## Chunk 60
oldD = setwd(system.file("extdata", package="Biostrings"))
Sc = readFASTA("Sc.fa", )[[1]]$seq
Sp = readFASTA("Sp.fa")[[1]]$seq
setwd(oldD)
mat <- matrix(-5L, nrow=4, ncol=4)
for (i in seq_len(4)) mat[i, i] <- 0L
rownames(mat) <- colnames(mat) <- DNA_ALPHABET[1:4]
dnaAlign1 = needwunsQS(Sc, Sp, mat, gappen=1)
nchar(dnaAlign1)
## Chunk 61
dnaAlign2 = needwunsQS(Sc, Sp, mat, gappen=6)
nchar(dnaAlign2)
  (solution chunk)
## Chunk 63
library("Rlibstree")
tree = SuffixTree(c(Sc, Sp))
MUM = getLongestCommonSubstring(tree)
nchar(MUM)
## Chunk 64
consmat(dnaAlign1)[, 1:20]