## ----echo=FALSE, warning=FALSE, message=FALSE, out.width="100%"---------------
knitr::include_graphics(system.file("scATAC-seq_workflow.png", package = "scPipe"))

## ----message=FALSE------------------------------------------------------------
library(scPipe)
data.folder <- system.file("extdata", package = "scPipe", mustWork = TRUE)


## -----------------------------------------------------------------------------
output_folder <- paste0(tempdir(), "/scPipeATACVignette")

## -----------------------------------------------------------------------------
# data fastq files
r1      <- file.path(data.folder, "small_chr21_R1.fastq.gz") 
r2      <- file.path(data.folder, "small_chr21_R3.fastq.gz") 

# barcodes in fastq format
barcode_fastq      <- file.path(data.folder, "small_chr21_R2.fastq.gz") 

# barcodes in .csv format
barcode_1000       <- file.path(data.folder, "chr21_modified_barcode_1000.csv")


## -----------------------------------------------------------------------------
sc_atac_trim_barcode (r1            = r1, 
                      r2            = r2, 
                      bc_file       = barcode_fastq, 
                      rmN           = FALSE,
                      rmlow         = FALSE,
                      output_folder = output_folder)

## ----eval=FALSE---------------------------------------------------------------
# sc_atac_trim_barcode (r1            = r1,
#                       r2            = r2,
#                       bc_file       = barcode_1000,
#                       id1_st        = -1,
#                       id1_len       = -1,
#                       id2_st        = -1,
#                       id2_len       = -10,
#                       output_folder = output_folder,
#                       rmN           = TRUE)

## -----------------------------------------------------------------------------
demux_r1        <- file.path(output_folder, "demux_completematch_small_chr21_R1.fastq.gz")
demux_r2        <- file.path(output_folder, "demux_completematch_small_chr21_R3.fastq.gz")
reference = file.path(data.folder, "small_chr21.fa")


aligned_bam <- sc_aligning(ref = reference, 
                R1 = demux_r1, 
                R2 = demux_r2, 
                nthreads  = 12,
                output_folder = output_folder)

## -----------------------------------------------------------------------------

sorted_tagged_bam <- sc_atac_bam_tagging (inbam = aligned_bam, 
                       output_folder =  output_folder, 
                       bam_tags      = list(bc="CB", mb="OX"), 
                       nthreads      =  12)


## ----eval=FALSE---------------------------------------------------------------
# 
# removed <- sc_atac_remove_duplicates(sorted_tagged_bam,
#                           output_folder = output_folder)
# if (!isFALSE(removed))
#   sorted_tagged_bam <- removed
# 

## -----------------------------------------------------------------------------
sc_atac_create_fragments(inbam = sorted_tagged_bam,
                         output_folder = output_folder)


## ----eval=FALSE---------------------------------------------------------------
# # CHECK IF THE .narrowPeak file is small enough to include in the package,
# # otherwise, we need to make this basilisk call work??
# sc_atac_peak_calling(inbam = sorted_tagged_bam,
#                      ref = reference,
#                      genome_size = NULL,
#                      output_folder = output_folder)
# 

## ----eval=FALSE---------------------------------------------------------------
# features          <- file.path(output_folder, "NA_peaks.narrowPeak")
# 
# sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"),
#                           feature_input = features,
#                           bam_tags      = list(bc="CB", mb="OX"),
#                           feature_type  = "peak",
#                           organism      = "hg38",
#                           yieldsize     = 1000000,
#                           exclude_regions = TRUE,
#                           output_folder = output_folder,
#                           fix_chr       = "none"
#                           )

## ----eval=FALSE---------------------------------------------------------------
# reference       <- file.path(data.folder, "small_chr21.fa")
# sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"),
#                           feature_input = reference,
#                           bam_tags      = list(bc="CB", mb="OX"),
#                           feature_type  = "genome_bin",
#                           organism      = "hg38",
#                           cell_calling  = FALSE,
#                           yieldsize     = 1000000,
#                           exclude_regions = TRUE,
#                           output_folder = output_folder,
#                           fix_chr       = "none"
#                           )
# 

## ----eval=FALSE---------------------------------------------------------------
# features          <- file.path(output_folder, "NA_peaks.narrowPeak")

## ----eval=TRUE, echo=FALSE----------------------------------------------------
features          <- file.path(data.folder, "NA_peaks.narrowPeak")

## -----------------------------------------------------------------------------
sc_atac_feature_counting (fragment_file = file.path(output_folder, "fragments.bed"),
                          feature_input = features, 
                          bam_tags      = list(bc="CB", mb="OX"), 
                          feature_type  = "peak",
                          organism      = "hg38",
                          cell_calling  = "filter",
                          min_uniq_frags = 0,
                          min_frac_peak = 0,
                          min_frac_promoter = 0,
                          yieldsize     = 1000000,
                          exclude_regions = TRUE,
                          output_folder = output_folder,
                          fix_chr       = "none"
                          )

## -----------------------------------------------------------------------------
feature_matrix <- readRDS(file.path(output_folder, "unfiltered_feature_matrix.rds"))
dplyr::glimpse(feature_matrix)

sparseM <- readRDS(file.path(output_folder, "sparse_matrix.rds"))
dplyr::glimpse(sparseM)

## -----------------------------------------------------------------------------
sce <- sc_atac_create_sce(input_folder = output_folder,
                   organism     = "hg38",
                   feature_type = "peak",
                   pheno_data   = NULL,
                   report       = FALSE)

## ----echo=FALSE, warning=FALSE, message=FALSE, out.width="100%"---------------
knitr::include_graphics(system.file("report_example.png", package = "scPipe"))

## ----eval = FALSE-------------------------------------------------------------
# data.folder <- system.file("extdata", package = "scPipe", mustWork = TRUE)
# output_folder <- tempdir()
# sce <- sc_atac_pipeline(r1 = file.path(data.folder, "small_chr21_R1.fastq.gz"),
#                         r2 = file.path(data.folder, "small_chr21_R3.fastq.gz"),
#                         barcode_fastq = file.path(data.folder, "small_chr21_R2.fastq.gz"),
#                         organism = "hg38",
#                         reference = file.path(data.folder, "small_chr21.fa"),
#                         feature_type = "peak",
#                         remove_duplicates = FALSE,
#                         min_uniq_frags = 0, # set to 0 as the sample dataset only has a small number of reads
#                         min_frac_peak = 0,
#                         min_frac_promoter = 0,
#                         output_folder = output_folder)

## ----echo=FALSE, results='hide'-----------------------------------------------
# cleanup tempfiles
unlink(output_folder, recursive=TRUE)

## -----------------------------------------------------------------------------
sessionInfo()