library(scater)
library(ggplot2)
library(BiocParallel)
library(blase)
library(limma)
library(ggVennDiagram)RNGversion("3.5.0")
#> Warning in RNGkind("Mersenne-Twister", "Inversion", "Rounding"): non-uniform
#> 'Rounding' sampler used
SEED <- 7
set.seed(SEED)N_CORES <- 2
bpparam <- MulticoreParam(N_CORES)This article will show how BLASE can be used for excluding the effect of developmental differences when analysing bulk RNA-seq data. We make use of scRNA-seq Dogga et al. 2024 and bulk RNA-seq data from Zhang et al. 2021. Code for generating the objects used here is available in the BLASE reproducibility repository.
First we’ll load in the data we’re using, pre-prepared from the BLASE reproducibility repository.
data(zhang_2021_heat_shock_bulk, package = "blase")
data(MCA_PF_SCE, package = "blase")We can examine the true lifecycle stages, and also the calculated pseudotime trajectory (Slingshot).
plotUMAP(MCA_PF_SCE, colour_by = "STAGE_HR")#| fig.alt: >
#|   UMAP of Dogga et al. single-cell RNA-seq reference coloured by pseudotime,
#|   starting with Rings, and ending with Schizonts.
plotUMAP(MCA_PF_SCE, colour_by = "slingPseudotime_1")
## Prepare BLASE
Now we’ll prepare BLASE for use.
First, we create the object, giving it the number of bins we want to use, and how to calculate them.
blase_data <- as.BlaseData(
    MCA_PF_SCE,
    pseudotime_slot = "slingPseudotime_1",
    n_bins = 8,
    split_by = "pseudotime_range"
)
# Add these bins to the sc metadata
MCA_PF_SCE <- assign_pseudotime_bins(MCA_PF_SCE,
    pseudotime_slot = "slingPseudotime_1",
    n_bins = 8,
    split_by = "pseudotime_range"
)Now we will select the genes we want to use, using BLASE’s gene peakedness metric.
gene_peakedness_info <- calculate_gene_peakedness(
    MCA_PF_SCE,
    window_pct = 5,
    knots = 18,
    BPPARAM = bpparam
)
genes_to_use <- gene_peakedness_spread_selection(
    MCA_PF_SCE,
    gene_peakedness_info,
    genes_per_bin = 30,
    n_gene_bins = 30
)
head(gene_peakedness_info)
#>             gene peak_pseudotime mean_in_window mean_out_window     ratio
#> 28 PF3D7-1401100       3.8311312       5.793085        1.528934  3.788969
#> 44 PF3D7-1401200       6.0203490       4.791942        1.921439  2.493934
#> 29 PF3D7-1401400       3.9679573     696.174509       39.689901 17.540344
#> 84 PF3D7-1401600      11.4933936      93.076303        7.631160 12.196875
#> 1  PF3D7-1401900       0.1368261      43.270750        3.321464 13.027613
#> 80 PF3D7-1402200      10.9460892       3.754015        1.554102  2.415553
#>    window_start window_end deviance_explained
#> 28    3.4890659  4.1731965          0.1881312
#> 44    5.6782838  6.3624143          0.2035355
#> 29    3.6258920  4.3100226          0.2513795
#> 84   11.1513283 11.8354589          0.6891177
#> 1    -0.2052392  0.4788914          0.3846938
#> 80   10.6040239 11.2881544          0.1768597Here, we add them to the BLASE object for mapping with.
genes(blase_data) <- genes_to_useNow we can perform the actual mapping step, and review the results.
mapping_results <- map_all_best_bins(
    blase_data,
    zhang_2021_heat_shock_bulk,
    BPPARAM = bpparam
)plot_mapping_result_heatmap(mapping_results)We calculate DE genes using Limma (ref) as we have access to only normalised counts.
metadata <- data.frame(row.names = seq_len(12))
metadata$strain <- c(
    rep("NF54", 4),
    rep("PB4", 4),
    rep("PB31", 4)
)
metadata$growth_conditions <- rep(c("Normal", "Normal", "HS", "HS"), 3)
metadata$group <- paste0(metadata$strain, "_", metadata$growth_conditions)
rownames(metadata) <- c(
    "NF54_37_1",
    "NF54_37_2",
    "NF54_41_1",
    "NF54_41_2",
    "PB4_37_1",
    "PB4_37_2",
    "PB4_41_1",
    "PB4_41_2",
    "PB31_37_1",
    "PB31_37_2",
    "PB31_41_1",
    "PB31_41_2"
)
design <- model.matrix(~ 0 + group, metadata)
colnames(design) <- gsub("group", "", colnames(design))
contr.matrix <- makeContrasts(
    WT_normal_v_HS = NF54_Normal - NF54_HS,
    PB31_normal_v_HS = PB31_Normal - PB31_HS,
    normal_NF54_v_PB31 = NF54_Normal - PB31_Normal,
    levels = colnames(design)
)vfit <- lmFit(zhang_2021_heat_shock_bulk, design)
vfit <- contrasts.fit(vfit, contrasts = contr.matrix)
efit <- eBayes(vfit)
summary(decideTests(efit))
#>        WT_normal_v_HS PB31_normal_v_HS normal_NF54_v_PB31
#> Down              118              118                179
#> NotSig            822              616                767
#> Up                 50              256                 44
PB31_normal_v_HS_BULK_DE <- topTable(efit, n = Inf, coef = 2)
PB31_normal_v_HS_BULK_DE <- PB31_normal_v_HS_BULK_DE[PB31_normal_v_HS_BULK_DE$adj.P.Val < 0.05 & PB31_normal_v_HS_BULK_DE$logFC > 0, ]
PB31_normal_v_HS_BULK_DE <- PB31_normal_v_HS_BULK_DE[order(-PB31_normal_v_HS_BULK_DE$logFC), ]First, let’s pseudobulk based on the pseudotime bins
pseudobulked_MCA_PF_SCE <- data.frame(row.names = rownames(MCA_PF_SCE))
for (r in mapping_results) {
    pseudobulked_MCA_PF_SCE[bulk_name(r)] <- rowSums(counts(MCA_PF_SCE[, MCA_PF_SCE$pseudotime_bin == best_bin(r)]))
}
pseudobulked_MCA_PF_SCE <- as.matrix(pseudobulked_MCA_PF_SCE)And now calculate DE genes for these
## Normalise, not needed for true bulks
par(mfrow = c(1, 2))
v <- voom(pseudobulked_MCA_PF_SCE, design, plot = FALSE)
vfit_sc <- lmFit(v, design)
vfit_sc <- contrasts.fit(vfit_sc, contrasts = contr.matrix)
efit_sc <- eBayes(vfit_sc)
summary(decideTests(efit_sc))
#>        WT_normal_v_HS PB31_normal_v_HS normal_NF54_v_PB31
#> Down                0              257                127
#> NotSig           1746             1370               1343
#> Up                  0              119                276PB31_normal_v_HS_SC_DE <- topTable(efit_sc, n = Inf, coef = 2)
PB31_normal_v_HS_SC_DE <- PB31_normal_v_HS_SC_DE[PB31_normal_v_HS_SC_DE$adj.P.Val < 0.05 & PB31_normal_v_HS_SC_DE$logFC > 0, ]
PB31_normal_v_HS_SC_DE <- PB31_normal_v_HS_SC_DE[order(-PB31_normal_v_HS_SC_DE$logFC), ]How many of these genes overlap? We can look at the intersection using a Venn Diagram:
ggVennDiagram(list(
    Phenotype = rownames(PB31_normal_v_HS_BULK_DE),
    Development = rownames(PB31_normal_v_HS_SC_DE)
)) + ggtitle("PB31 Normal vs Heat Shock")Great, there are genes which we can correct. We can now remove these from the original DE genes list.
NB: This is a primitive approach, and there are likely many other and better ways to calculate which genes should be kept or removed.
PB31_normal_v_HS_corrected_DE <- rownames(PB31_normal_v_HS_BULK_DE[!(rownames(PB31_normal_v_HS_BULK_DE) %in% rownames(PB31_normal_v_HS_SC_DE)), ])
head(PB31_normal_v_HS_corrected_DE)
#> [1] "PF3D7-0725400" "PF3D7-0905400" "PF3D7-1010300" "PF3D7-0731500"
#> [5] "PF3D7-0913800" "PF3D7-1017500"Further downstream analysis can now be performed, as desired by the researcher, for example Gene Ontology term enrichment.
sessionInfo()
#> R version 4.5.1 Patched (2025-08-23 r88802)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#> 
#> Matrix products: default
#> BLAS:   /home/biocbuild/bbs-3.22-bioc/R/lib/libRblas.so 
#> LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
#> 
#> Random number generation:
#>  RNG:     Mersenne-Twister 
#>  Normal:  Inversion 
#>  Sample:  Rounding 
#>  
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_GB              LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: America/New_York
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] ggVennDiagram_1.5.4         limma_3.65.5               
#>  [3] patchwork_1.3.2             blase_0.99.5               
#>  [5] BiocParallel_1.43.4         scater_1.37.0              
#>  [7] ggplot2_4.0.0               scuttle_1.19.0             
#>  [9] SingleCellExperiment_1.31.1 SummarizedExperiment_1.39.2
#> [11] Biobase_2.69.1              GenomicRanges_1.61.5       
#> [13] Seqinfo_0.99.2              IRanges_2.43.5             
#> [15] S4Vectors_0.47.4            BiocGenerics_0.55.3        
#> [17] generics_0.1.4              MatrixGenerics_1.21.0      
#> [19] matrixStats_1.5.0           BiocStyle_2.37.1           
#> 
#> loaded via a namespace (and not attached):
#>   [1] RcppAnnoy_0.0.22         splines_4.5.1            later_1.4.4             
#>   [4] tibble_3.3.0             polyclip_1.10-7          fastDummies_1.7.5       
#>   [7] lifecycle_1.0.4          globals_0.18.0           lattice_0.22-7          
#>  [10] MASS_7.3-65              magrittr_2.0.4           plotly_4.11.0           
#>  [13] sass_0.4.10              rmarkdown_2.30           jquerylib_0.1.4         
#>  [16] yaml_2.3.10              httpuv_1.6.16            Seurat_5.3.0            
#>  [19] sctransform_0.4.2        spam_2.11-1              sp_2.2-0                
#>  [22] spatstat.sparse_3.1-0    reticulate_1.43.0        cowplot_1.2.0           
#>  [25] pbapply_1.7-4            RColorBrewer_1.1-3       abind_1.4-8             
#>  [28] Rtsne_0.17               purrr_1.1.0              ggrepel_0.9.6           
#>  [31] irlba_2.3.5.1            listenv_0.9.1            spatstat.utils_3.2-0    
#>  [34] goftest_1.2-3            RSpectra_0.16-2          spatstat.random_3.4-2   
#>  [37] fitdistrplus_1.2-4       parallelly_1.45.1        codetools_0.2-20        
#>  [40] DelayedArray_0.35.3      tidyselect_1.2.1         farver_2.1.2            
#>  [43] ScaledMatrix_1.17.0      viridis_0.6.5            spatstat.explore_3.5-3  
#>  [46] jsonlite_2.0.0           BiocNeighbors_2.3.1      progressr_0.17.0        
#>  [49] ggridges_0.5.7           survival_3.8-3           tools_4.5.1             
#>  [52] ica_1.0-3                Rcpp_1.1.0               glue_1.8.0              
#>  [55] gridExtra_2.3            SparseArray_1.9.1        xfun_0.53               
#>  [58] mgcv_1.9-3               dplyr_1.1.4              withr_3.0.2             
#>  [61] BiocManager_1.30.26      fastmap_1.2.0            boot_1.3-32             
#>  [64] digest_0.6.37            rsvd_1.0.5               R6_2.6.1                
#>  [67] mime_0.13                scattermore_1.2          tensor_1.5.1            
#>  [70] dichromat_2.0-0.1        spatstat.data_3.1-8      tidyr_1.3.1             
#>  [73] data.table_1.17.8        httr_1.4.7               htmlwidgets_1.6.4       
#>  [76] S4Arrays_1.9.1           uwot_0.2.3               pkgconfig_2.0.3         
#>  [79] gtable_0.3.6             lmtest_0.9-40            S7_0.2.0                
#>  [82] XVector_0.49.1           htmltools_0.5.8.1        dotCall64_1.2           
#>  [85] bookdown_0.45            SeuratObject_5.2.0       scales_1.4.0            
#>  [88] png_0.1-8                spatstat.univar_3.1-4    knitr_1.50              
#>  [91] reshape2_1.4.4           nlme_3.1-168             cachem_1.1.0            
#>  [94] zoo_1.8-14               stringr_1.5.2            KernSmooth_2.23-26      
#>  [97] parallel_4.5.1           miniUI_0.1.2             vipor_0.4.7             
#> [100] pillar_1.11.1            grid_4.5.1               vctrs_0.6.5             
#> [103] RANN_2.6.2               promises_1.3.3           BiocSingular_1.25.0     
#> [106] beachmat_2.25.5          xtable_1.8-4             cluster_2.1.8.1         
#> [109] beeswarm_0.4.0           evaluate_1.0.5           tinytex_0.57            
#> [112] magick_2.9.0             cli_3.6.5                compiler_4.5.1          
#> [115] rlang_1.1.6              crayon_1.5.3             future.apply_1.20.0     
#> [118] labeling_0.4.3           plyr_1.8.9               ggbeeswarm_0.7.2        
#> [121] stringi_1.8.7            viridisLite_0.4.2        deldir_2.0-4            
#> [124] lazyeval_0.2.2           spatstat.geom_3.6-0      Matrix_1.7-4            
#> [127] RcppHNSW_0.6.0           sparseMatrixStats_1.21.0 future_1.67.0           
#> [130] statmod_1.5.1            shiny_1.11.1             ROCR_1.0-11             
#> [133] igraph_2.2.0             bslib_0.9.0