## ---- include = FALSE--------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----eval=FALSE--------------------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("NxtIRFcore") ## ----------------------------------------------------------------------------- library(NxtIRFcore) ## ----------------------------------------------------------------------------- # Provides the path to the example genome: chrZ_genome() # Provides the path to the example gene annotation: chrZ_gtf() ## ---- results='hide', message = FALSE, warning = FALSE------------------------ ref_path = file.path(tempdir(), "Reference") BuildReference( reference_path = ref_path, fasta = chrZ_genome(), gtf = chrZ_gtf() ) ## ----------------------------------------------------------------------------- bams = NxtIRF_example_bams() bams ## ---- results='hide', message = FALSE, warning = FALSE------------------------ irf_path = file.path(tempdir(), "IRFinder_output") IRFinder( bamfiles = bams$path, sample_names = bams$sample, reference_path = ref_path, output_path = irf_path ) ## ----------------------------------------------------------------------------- expr = Find_IRFinder_Output(irf_path) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ nxtirf_path = file.path(tempdir(), "NxtIRF_output") CollateData( Experiment = expr, reference_path = ref_path, output_path = nxtirf_path ) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ se = MakeSE(nxtirf_path) ## ----------------------------------------------------------------------------- colData(se)$condition = rep(c("A", "B"), each = 3) colData(se)$batch = rep(c("K", "L", "M"), 2) ## ---- results='hide', message = FALSE, warning = FALSE------------------------ # Requires limma to be installed: require("limma") res_limma = limma_ASE( se = se, test_factor = "condition", test_nom = "B", test_denom = "A", ) # Requires DESeq2 to be installed: require("DESeq2") res_deseq = DESeq_ASE( se = se, test_factor = "condition", test_nom = "B", test_denom = "A", ) # Requires DoubleExpSeq to be installed: require("DoubleExpSeq") res_DES = DoubleExpSeq_ASE( se = se, test_factor = "condition", test_nom = "B", test_denom = "A", ) ## ----------------------------------------------------------------------------- res_limma.filtered = subset(res_limma, abs(AvgPSI_A - AvgPSI_B) > 0.05) ## ---- fig.width = 8, fig.height = 6------------------------------------------- p = Plot_Coverage( se = se, Event = res_limma.filtered$EventName[1], tracks = colnames(se)[c(1,2,4,5)], ) as_egg_ggplot(p) ## ----eval = FALSE------------------------------------------------------------- # # Running this will display an interactive plot # p$final_plot ## ---- fig.width = 8, fig.height = 6------------------------------------------- p = Plot_Coverage( se = se, Event = res_limma.filtered$EventName[1], tracks = c("A", "B"), condition = "condition", stack_tracks = TRUE, t_test = TRUE, ) as_egg_ggplot(p) ## ----include = FALSE---------------------------------------------------------- # Reset to prevent interfering with next section colData(se)$condition = NULL colData(se)$batch = NULL ## ---- eval = FALSE------------------------------------------------------------ # library(NxtIRFcore) ## ---- eval = FALSE------------------------------------------------------------ # ref_path = file.path(tempdir(), "Reference") # # GetReferenceResource( # reference_path = ref_path, # fasta = chrZ_genome(), # gtf = chrZ_gtf() # ) # # BuildReference(reference_path = ref_path) # # # or equivalently, in a one-step process: # # BuildReference( # reference_path = ref_path, # fasta = chrZ_genome(), # gtf = chrZ_gtf() # ) # ## ---- eval = FALSE------------------------------------------------------------ # bam_path = file.path(tempdir(), "bams") # # # Copies NxtIRF example BAM files to `bam_path` subdirectory; returns BAM paths # example_bams(path = bam_path) ## ---- eval = FALSE----------------------------------------------------------- # # as BAM file names denote their sample names # bams = Find_Bams(bam_path, level = 0) # # # In the case where BAM files are labelled using sample names as parent # # directory names (which oftens happens with the STAR aligner), use level = 1 ## ---- eval = FALSE------------------------------------------------------------ # irf_path = file.path(tempdir(), "IRFinder_output") # IRFinder( # bamfiles = bams$path, # sample_names = bams$sample, # reference_path = ref_path, # output_path = irf_path, # n_threads = 2, # overwrite = FALSE, # run_featureCounts = FALSE # ) ## ---- eval = FALSE------------------------------------------------------------ # # Re-run IRFinder without overwrite, and run featureCounts # require(Rsubread) # # IRFinder( # bamfiles = bams$path, # sample_names = bams$sample, # reference_path = ref_path, # output_path = irf_path, # n_threads = 2, # overwrite = FALSE, # run_featureCounts = TRUE # ) # # # Load gene counts # gene_counts <- readRDS(file.path(irf_path, "main.FC.Rds")) # # # Access gene counts: # gene_counts$counts ## ---- eval = FALSE------------------------------------------------------------ # expr <- Find_IRFinder_Output(irf_path) ## ---- eval = FALSE------------------------------------------------------------ # nxtirf_path = file.path(tempdir(), "NxtIRF_output") # # CollateData( # Experiment = expr, # reference_path = ref_path, # output_path = nxtirf_path, # IRMode = "SpliceOverMax", # n_threads = 1 # ) ## ---- eval = FALSE------------------------------------------------------------ # se = MakeSE(nxtirf_path, RemoveOverlapping = TRUE) ## ---- eval = FALSE------------------------------------------------------------ # subset_samples = colnames(se)[1:4] # df = data.frame(sample = subset_samples) # se_small = MakeSE(nxtirf_path, colData = df, RemoveOverlapping = TRUE) ## ---- results = FALSE, message = FALSE, warning = FALSE----------------------- expressed_events <- apply_filters(se, get_default_filters()) se.filtered = se[expressed_events,] ## ----------------------------------------------------------------------------- get_default_filters() ## ----------------------------------------------------------------------------- f1 = NxtFilter(filterClass = "Data", filterType = "Depth", minimum = 20) f1 ## ----eval=FALSE--------------------------------------------------------------- # ?NxtFilter ## ----------------------------------------------------------------------------- colData(se.filtered) ## ----------------------------------------------------------------------------- colData(se.filtered)$condition = rep(c("A", "B"), each = 3) colData(se.filtered)$batch = rep(c("K", "L", "M"), 2) # To display what colData is: as.data.frame(colData(se.filtered)) ## ----------------------------------------------------------------------------- require("limma") # Compare by condition-B vs condition-A res_limma_condition <- limma_ASE( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", ) # Compare by condition-B vs condition-A, batch-corrected by "batch" res_limma_treatment_batchcorrected <- limma_ASE( se = se.filtered, test_factor = "condition", test_nom = "B", test_denom = "A", batch1 = "batch" ) ## ---- fig.width = 7, fig.height = 5------------------------------------------- library(ggplot2) ggplot(res_limma_condition, aes(x = 100 * AvgPSI_B, y = 100 * AvgPSI_A)) + geom_point() + xlim(0, 100) + ylim(0, 100) + labs(title = "PSI values across conditions", x = "PSI of condition B", y = "PSI of condition A") ## ---- fig.width = 7, fig.height = 5------------------------------------------- ggplot(subset(res_limma_condition, EventType == "IR"), aes(x = 100 * AvgPSI_B, y = 100 * AvgPSI_A)) + geom_point() + xlim(0, 100) + ylim(0, 100) + labs(title = "PIR values across conditions (IR Only)", x = "PIR of condition B", y = "PIR of condition A") ## ---- fig.width = 7, fig.height = 5------------------------------------------- ggplot(res_limma_condition, aes(x = logFC, y = -log10(adj.P.Val))) + geom_point() + labs(title = "Differential analysis - B vs A", x = "Log2-fold change", y = "BH-adjusted P values (-log10)") ## ----------------------------------------------------------------------------- mat = make_matrix( se.filtered, event_list = res_limma_condition$EventName[1:10], method = "PSI" ) ## ---- fig.width = 8, fig.height = 6------------------------------------------- library(pheatmap) pheatmap(mat, annotation_col = as.data.frame(colData(se.filtered))) ## ---- fig.width = 8, fig.height = 6------------------------------------------- res = Plot_Coverage( se = se.filtered, Gene = "TP53", tracks = colnames(se.filtered)[1] ) as_egg_ggplot(res) ## ----eval = FALSE------------------------------------------------------------- # # Running this will display an interactive plot # res$final_plot ## ---- fig.width = 8, fig.height = 6------------------------------------------- res = Plot_Coverage( se = se.filtered, Gene = "TP53", tracks = colnames(se.filtered)[1], condense_tracks = TRUE ) as_egg_ggplot(res) ## ---- fig.width = 8, fig.height = 6------------------------------------------- res = Plot_Coverage( se = se.filtered, Gene = "TP53", tracks = colnames(se.filtered)[1], selected_transcripts = c("TP53-201", "TP53-204") ) as_egg_ggplot(res) ## ----------------------------------------------------------------------------- colData(se.filtered)$NSUN5_IR = c(rep("High", 2), rep("Low", 4)) ## ----------------------------------------------------------------------------- require("limma") res_limma_NSUN5 <- limma_ASE( se = se.filtered, test_factor = "NSUN5_IR", test_nom = "High", test_denom = "Low", ) head(res_limma_NSUN5$EventName) ## ---- fig.width = 8, fig.height = 6------------------------------------------- res = Plot_Coverage( se = se.filtered, Event = res_limma_NSUN5$EventName[1], condition = "NSUN5_IR", tracks = c("High", "Low"), ) as_egg_ggplot(res) ## ---- fig.width = 8, fig.height = 6------------------------------------------- res = Plot_Coverage( se = se.filtered, Event = "NSUN5/ENST00000252594_Intron2/clean", condition = "NSUN5_IR", tracks = c("High", "Low"), stack_tracks = TRUE, t_test = TRUE ) as_egg_ggplot(res) ## ----eval = FALSE------------------------------------------------------------- # library(NxtIRFcore) ## ----eval = FALSE------------------------------------------------------------- # ref_path = "./Reference" ## ----eval=FALSE--------------------------------------------------------------- # BuildReference( # reference_path = ref_path, # fasta = "genome.fa", gtf = "transcripts.gtf", # genome_type = "hg38" # ) ## ----eval=FALSE--------------------------------------------------------------- # FTP = "ftp://ftp.ensembl.org/pub/release-94/" # # BuildReference( # reference_path = ref_path, # fasta = paste0(FTP, "fasta/homo_sapiens/dna/", # "Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"), # gtf = paste0(FTP, "gtf/homo_sapiens/", # "Homo_sapiens.GRCh38.94.chr.gtf.gz"), # genome_type = "hg38" # ) ## ----------------------------------------------------------------------------- require(AnnotationHub) ah = AnnotationHub() query(ah, "Ensembl") ## ----------------------------------------------------------------------------- query(ah, c("Homo Sapiens", "release-94")) ## ----eval=FALSE--------------------------------------------------------------- # BuildReference( # reference_path = ref_path, # fasta = "AH65745", # gtf = "AH64631", # genome_type = "hg38" # ) ## ----eval=FALSE--------------------------------------------------------------- # BuildReference( # reference_path = ref_path, # fasta = "genome.fa", gtf = "transcripts.gtf", # genome_type = "" # ) ## ----eval=FALSE--------------------------------------------------------------- # BuildReference_Full( # reference_path = ref_path, # fasta = "genome.fa", gtf = "transcripts.gtf", # genome_type = "", # use_STAR_mappability = TRUE, # n_threads = 4 # ) ## ----eval = FALSE------------------------------------------------------------- # # (1a) Creates genome resource files # # ref_path = file.path(tempdir(), "Reference") # # GetReferenceResource( # reference_path = ref_path, # fasta = chrZ_genome(), # gtf = chrZ_gtf() # ) # # # (1b) Systematically generate reads based on the NxtIRF example genome: # # Mappability_GenReads( # reference_path = ref_path # ) # # # (2) Align the generated reads using Rsubread: # # # (2a) Build the Rsubread genome index: # # setwd(ref_path) # Rsubread::buildindex(basename = "./reference_index", # reference = chrZ_genome()) # # # (2b) Align the synthetic reads using Rsubread::subjunc() # # Rsubread::subjunc( # index = "./reference_index", # readfile1 = file.path(ref_path, "Mappability", "Reads.fa"), # output_file = file.path(ref_path, "Mappability", "AlignedReads.bam"), # useAnnotation = TRUE, # annot.ext = chrZ_gtf(), # isGTF = TRUE # ) # # # (3) Analyse the aligned reads in the BAM file for low-mappability regions: # # Mappability_CalculateExclusions( # reference_path = ref_path, # aligned_bam = file.path(ref_path, "Mappability", "AlignedReads.bam") # ) # # # (4) Build the NxtIRF reference using the calculated Mappability Exclusions # # BuildReference(ref_path) # ## ----------------------------------------------------------------------------- STAR_version() ## ----eval = FALSE------------------------------------------------------------- # # ref_path = "./Reference" # # # Ensure genome resources are prepared from genome FASTA and GTF file: # # if(!file.exists(file.path(ref_path, "resources"))) { # GetReferenceResource( # reference_path = ref_path, # fasta = "genome.fa", # gtf = "transcripts.gtf" # ) # } # # # Generate a STAR genome reference: # STAR_BuildRef( # reference_path = ref_path, # n_threads = 8 # ) # ## ----eval = FALSE------------------------------------------------------------- # STAR_align_fastq( # STAR_ref_path = file.path(ref_path, "STAR"), # BAM_output_path = "./bams/sample1", # fastq_1 = "sample1_1.fastq", fastq_2 = "sample1_2.fastq", # n_threads = 8 # ) ## ----eval = FALSE------------------------------------------------------------- # Experiment = data.frame( # sample = c("sample_A", "sample_B"), # forward = file.path("raw_data", c("sample_A", "sample_B"), # c("sample_A_1.fastq", "sample_B_1.fastq")), # reverse = file.path("raw_data", c("sample_A", "sample_B"), # c("sample_A_2.fastq", "sample_B_2.fastq")) # ) # # STAR_align_experiment( # Experiment = Experiment, # STAR_ref_path = file.path("Reference_FTP", "STAR"), # BAM_output_path = "./bams", # n_threads = 8, # two_pass = FALSE # ) ## ----eval=FALSE--------------------------------------------------------------- # bams = Find_Bams("./bams", level = 1) ## ----eval=FALSE--------------------------------------------------------------- # # assume NxtIRF reference has been generated in `ref_path` # # IRFinder( # bamfiles = bams$path, # sample_names = bams$sample, # reference_path = ref_path, # output_path = "./IRFinder_output", # n_threads = 4, # Use_OpenMP = TRUE # ) ## ----eval=FALSE--------------------------------------------------------------- # # BAM2COV( # bamfiles = bams$path, # sample_names = bams$sample, # output_path = "./IRFinder_output", # n_threads = 4, # Use_OpenMP = TRUE # ) ## ----eval=FALSE--------------------------------------------------------------- # expr = Find_IRFinder_Output("./IRFinder_output") ## ----eval = FALSE------------------------------------------------------------- # CollateData( # Experiment = expr, # reference_path = ref_path, # output_path = "./NxtIRF_output" # ) ## ----eval = FALSE------------------------------------------------------------- # se = MakeSE("./NxtIRF_output") ## ----------------------------------------------------------------------------- sessionInfo()