## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   library(ORFik)                        # This package
#   # Here the default values are shown:
#   where_to_save_config <- config_file() # Bioc Cache directory default
# 
#   parent_folder <- "~/bio_data/" # Change if you want it somewhere else
#   fastq.dir <- file.path(parent_folder, "raw_data")
#   bam.dir <- file.path(parent_folder, "processed_data")
#   reference.dir <- file.path(parent_folder, "references")
#   exp.dir <- file.path(base.dir, "ORFik_experiments/")
#   config.save(where_to_save_config,
#               fastq.dir, bam.dir, reference.dir, reference.dir)

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   config()

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   conf <- config.exper(experiment = "CHALMERS_Yeast", # short experiment info: here I used author + species
#                      assembly = "Yeast_SacCer3", # Reference folder
#                      type = c("RNA-seq")) # fastq and bam type
#   conf

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# 
# info <- download.SRA.metadata("SRP012047", outdir = conf["fastq RNA-seq"])
# # Let's take 2 first runs in this experiment:
# info <- info[1:2,]

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# # Subset
# # 18 MB, ~ 40 sec download time ->
# download.SRA(info, conf["fastq RNA-seq"], subset = 50000)
# 
# # Or you could do the full libraries
# # 1.6 GB, ~ 100 sec download time
# # (Full size files have faster MB/s download speed than subsets) ->
# # download.SRA(info, conf["fastq RNA-seq"])

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# organism <- info$ScientificName[1]
# is_paired_end <- all(info$LibraryLayout == "PAIRED")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   annotation <- getGenomeAndAnnotation(
#                       organism = organism,
#                       output.dir = conf["ref"],
#                       assembly_type = "toplevel" # Usually leave this to "auto"
#                       )

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   annotation <- getGenomeAndAnnotation(
#                       organism = organism,
#                       genome = TRUE, GTF = TRUE,
#                       phix = TRUE, ncRNA = TRUE, tRNA = TRUE, rRNA = TRUE,
#                       output.dir = conf["ref"],
#                       assembly_type = "toplevel" # Usually leave this to "auto"
#                       )

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   gtf <- "/path/to/local/annotation.gtf"
#   genome <- "/path/to/local/genome.fasta"
#   makeTxdbFromGenome(gtf, genome, organism = "Saccharomyces cerevisiae")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   txdb <- "/path/to/local/annotation.gtf.db"
#   cage <- "/path/to/CAGE.ofst" # Can be bed, wig etc (ofst is fastest)
#   reassigned_txdb <- assignTSSByCage(txdb, cage)
#   AnnotationDbi::saveDb("/path/to/local/annotation_cage.db")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   txdb <- "/path/to/local/annotation.gtf.db"
#   reassigned_txdb <- assignTSSByCage(txdb, cage = NULL, pseudoLength = 100)
#   AnnotationDbi::saveDb(reassigned_txdb,
#                         "/path/to/local/annotation_pseudo_leaders.db")
#   leaders <- loadRegion(reassigned_txdb, "leaders")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# index <- STAR.index(annotation)

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# alignment <-
#   STAR.align.folder(conf["fastq RNA-seq"], conf["bam RNA-seq"], index,
#                     paired.end = is_paired_end,
#                     steps = "tr-ge", # (trim needed: adapters found, then genome)
#                     adapter.sequence = "auto",
#                     max.cpus = 30, trim.front = 3, min.length = 20)

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# dir.create("~/UMIandTrimmed")
# system(paste(install.fastp(),
#              "-i", "~/sample1.fastq.gz",
#              "-o", "~/UMIandTrimmed/sample1.UMIandTrimmed.fastq.gz"
#              "--adapter_sequence=AGATCGGAAGAGC",
#              "--umi",
#              "--umi_loc", "read1",
#              "--umi_len", 12))
# # Read fastp documentation for info about umi arguments

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# alignment <-
#   STAR.align.folder(conf["fastq RNA-seq"], conf["bam RNA-seq"], index,
#                     paired.end = is_paired_end,
#                     steps = "tr", # (Only trim)
#                     adapter.sequence = "auto",
#                     max.cpus = 30, trim.front = 3, min.length = 20)
# trimmed_files <- file.path(conf["bam RNA-seq"], "trim")
# # Check that trimmed reads are average < 50 bases, else collapsing makes little sense for speedup.
# trim_table <- ORFik:::trimming.table(trimmed_files)
# stopifnot(all(trim_table$trim_mean_length < 50))
# trimmed_fastq <- list.files(path = trimmed_files, pattern = "fastq", full.names = TRUE)
# ORFik::collapse.fastq(files = trimmed_fastq, outdir = file.path(conf["bam Ribo-seq"], "collapsed") # Collapse all files in trim folder
# # Then align using collapsed reads
# alignment <-
#   STAR.align.folder(trimmed_files, conf["bam RNA-seq"], index,
#                     paired.end = is_paired_end,
#                     steps = "ge", # (Only genome alignment from collapsed)
#                     max.cpus = 30, min.length = 20)

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# index <- STAR.index(annotation, max.ram = 20, SAsparse = 2)

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# STAR.align.folder(conf["fastq RNA-seq"], conf["bam RNA-seq"], index,
#                     max.cpus = 4) # Reduce cores to 4 usually works for most systems

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# STAR.align.folder(conf["fastq RNA-seq"], conf["bam RNA-seq"], index,
#                     max.cpus = 4, steps = "tr-ge", resume = "ge") # Resume ge using completed tr

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# txdb_file <- paste0(annotation["gtf"], ".db") # Get txdb file, not raw gtf
# fa <- annotation["genome"]
# create.experiment(exper = "yeast_exp_RNA",
#                   dir = paste0(conf["bam RNA-seq"], "/aligned/"),
#                   txdb = txdb_file,
#                   fa = fa,
#                   organism = organism,
#                   viewTemplate = FALSE,
#                   pairedEndBam = is_paired_end # True/False per bam file
#                   )

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
# df <- read.experiment("yeast_exp_RNA")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   QCreport(df)

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   remove.experiments(df) # Remove loaded libraries
#   convertLibs(df, type = "ofst")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   remove.experiments(df)
#   convertLibs(df, type = "wig")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   remove.experiments(df)
#   outputLibs(df, type = "ofst")

## ----eval = FALSE, echo = TRUE, message = FALSE-------------------------------
#   mrna <- countTable(df, region = "mrna", type = "fpkm")
#   cds <- countTable(df, region = "cds", type = "fpkm")
#   ratio <- cds / mrna