############################################################################## ## Exercise 1: ############## These excercises investigated local resources on the AMI. The only R code involved was to check the number of workers. library(BiocParallel) bpworkers() ############################################################################## ## Exercise 2: ############## library(RAmazonS3) res <- listBucket("1000genomes", auth = NA, maxKeys = 500) ## Create url bucketname <- "1000genomes" key <- "data/HG00096/alignment/HG00096.chrom20.ILLUMINA.bwa.GBR.low_coverage.20120522.bam" url <- paste0("http://s3.amazonaws.com/", bucketname, "/", key) ## Download HG00096 chromosome 20 t <- tempfile() ## alternatively specify a path to the local EC2 storage download.file(url, t) ############################################################################## ## Exercise 3: ############## library(BiocParallel) library(Rsamtools) library(cn.mops) library(DNAcopy) ## for segmentation algo bfl <- BamFileList(c("exp_srx036695.bam", "exp_srx036696.bam", "exp_srx036692.bam", "exp_srx036697.bam")) group <- c("tumor", "tumor", "normal", "normal") si <- seqinfo(bfl[[1]]) ############################################################################## ## Exercise 4: ############## chr4 <- si["chr4"] tiles <- tileGenome(chr4, tilewidth = 1e4) sbp <- ScanBamParam(which = unlist(tiles, use.names = FALSE)) ## Multi-core workers share memory. They can all ## see `sbp` so there is not need to pass it to ## `FUN` (and make a copy for each worker). library(GenomicAlignments) mcparam <- MulticoreParam() FUN1 <- function(file) { coverage(file, param = sbp) } res1 <- bplapply(bfl, FUN1, BPPARAM = mcparam) ## The snow cluster workers do not share memory and `sbp` ## must be explictly passed to `FUN`:: sparam <- SnowParam(workers = bpworkers()) FUN2 <- function(file, param) { library(GenomicAlignments) ## for coverage method coverage(file, param = param) } res2 <- bplapply(bfl, FUN2, BPPARAM = sparam, param = sbp) ############################################################################## ## Exercise 5A: ############## FUN3 <- function(chrom, files, WL, group, ...) { library(cn.mops) counts <- getReadCountsFromBAM(files, WL = WL, mode = "unpaired", refSeqName = chrom, parallel = length(files)) referencecn.mops(cases = counts[,group == "tumor"], controls = counts[,group == "normal"]) } chrom <- c("chr1", "chr4", "chr17") sparam <- SnowParam(3) res3 <- bplapply(chrom, FUN3, BPPARAM = sparam, files = path(bfl), WL = 18000, group = group) ############################################################################## ## Exercise 5B: ############## FUN3_mod <- function(chrom, files, WL, group, ...) { library(cn.mops) lst <- bplapply(files, getReadCountsFromBAM, BPPARAM = MulticoreParam(length(files)), WL = WL, mode = "unpaired", refSeqName = chrom) ## combine results counts <- lst[[1]] mcols(counts) <- DataFrame(mcols(counts), lapply(lst[-1], mcols)) referencecn.mops(cases = counts[,group == "tumor"], controls = counts[,group == "normal"]) } res3_mod <- bplapply(chrom, FUN3_mod, files = path(bfl), WL = 18000, group = group)