## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE------------------- suppressPackageStartupMessages({ library(signatureSearchData) }) # knitr::opts_knit$set(root.dir = "~/insync/project/longevityTools_eDRUG/") ## ----install, eval=FALSE------------------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # BiocManager::install("signatureSearchData") ## ----load, eval=FALSE---------------------------------------------------- # library(signatureSearchData) ## ----eh_explore_ssd1, eval=TRUE, warning=FALSE, message=FALSE------------ library(ExperimentHub) eh <- ExperimentHub() ssd <- query(eh, c("signatureSearchData")) ssd ## ----eh_explore_ssd2, eval=TRUE, warning=FALSE, message=FALSE------------ ssd$title ## ----eh_explore_ssd3, eval=TRUE------------------------------------------ as.list(ssd)[10] ## ----lincs, eval=FALSE--------------------------------------------------- # library(ExperimentHub); library(rhdf5) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "lincs")) # lincs_path <- eh[['EH3226']] # rhdf5::h5ls(lincs_path) ## ----filter_meta42, eval=FALSE------------------------------------------- # meta42 <- readr::read_tsv("./data/GSE92742_Broad_LINCS_sig_info.txt") # dose <- meta42$pert_idose[7] # ## filter rows by 'pert_type' as compound, 10uM concentration, and 24h treatment time # meta42_filter <- sig_filter(meta42, pert_type="trt_cp", dose=dose, time="24 h") # 45956 X 14 ## ----extract_modz, eval=FALSE-------------------------------------------- # library(signatureSearch) # gctx <- "./data/GSE92742_Broad_LINCS_Level5_COMPZ.MODZ_n473647x12328.gctx" # gctx2h5(gctx, cid=meta42_filter$sig_id, new_cid=meta42_filter$pert_cell_factor, # h5file="./data/lincs.h5", by_ncol=5000, overwrite=TRUE) # se <- readHDF5chunk(h5file="./data/lincs.h5", colindex=1:5000) ## ----lincs_expr, eval=FALSE---------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "lincs_expr")) # lincs_expr_path <- eh[['EH3227']] ## ----filter_expr, eval=FALSE--------------------------------------------- # inst42 <- readr::read_tsv("./data/GSE92742_Broad_LINCS_inst_info.txt") # inst42_filter <- inst_filter(inst42, pert_type="trt_cp", dose=10, dose_unit="um", # time=24, time_unit="h") # 169,795 X 13 ## ----extract_expr, eval=FALSE-------------------------------------------- # # It takes some time # meanExpr2h5(gctx="./data/GSE92742_Broad_LINCS_Level3_INF_mlr12k_n1319138x12328.gctx", # inst=inst42_filter, h5file="./data/lincs_expr.h5") # 12328 X 38824 ## ----cmap_rank, eval=FALSE----------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "cmap_rank")) # cmap_rank_path <- eh[["EH3225"]] # se <- readHDF5chunk(h5file=cmap_rank_path, colindex=1:3587) ## ----filter_rankm, eval=FALSE-------------------------------------------- # path <- system.file("extdata", "cmap_instances_02.txt", package="signatureSearchData") # cmap_inst <- read.delim(path, check.names=FALSE) # inst_id <- cmap_inst$instance_id[!duplicated(paste(cmap_inst$cmap_name, cmap_inst$cell2, sep="_"))] # rankM <- read.delim("./data/rankMatrix.txt", check.names=FALSE, row.names=1) # 22283 X 6100 # rankM_sub <- rankM[, as.character(inst_id)] # colnames(rankM_sub) <- unique(paste(cmap_inst$cmap_name, cmap_inst$cell2, "trt_cp", sep="__")) ## ----affyid_annot, eval=FALSE, message=FALSE----------------------------- # library(hgu133a.db) # myAnnot <- data.frame(ACCNUM=sapply(contents(hgu133aACCNUM), paste, collapse=", "), # SYMBOL=sapply(contents(hgu133aSYMBOL), paste, collapse=", "), # UNIGENE=sapply(contents(hgu133aUNIGENE), paste, collapse=", "), # ENTREZID=sapply(contents(hgu133aENTREZID), paste, collapse=", "), # ENSEMBL=sapply(contents(hgu133aENSEMBL), paste, collapse=", "), # DESC=sapply(contents(hgu133aGENENAME), paste, collapse=", ")) # saveRDS(myAnnot, "./data/myAnnot.rds") ## ----mr_prob, eval=FALSE------------------------------------------------- # rankM_sub_gene <- probe2gene(rankM_sub, myAnnot) ## ----cmap_rank2h5, eval=FALSE-------------------------------------------- # matrix2h5(rankM_sub_gene, "./data/cmap_rank.h5", overwrite=TRUE) # 12403 X 3587 # rhdf5::h5ls("./data/cmap_rank.h5") # ## Read in cmap_rank.h5 as SummarizedExperiment object by chunks # cmap_rank_se <- readHDF5chunk("./data/cmap_rank.h5", colindex=1:5) ## ----cmap_expr, eval=FALSE----------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "cmap_expr")) # cmap_expr_path <- eh[["EH3224"]] ## ----cmap, eval=FALSE---------------------------------------------------- # library(ExperimentHub) # eh <- ExperimentHub() # query(eh, c("signatureSearchData", "cmap")) # cmap_expr_path <- eh[["EH3224"]] ## ----cel_file_list, eval=FALSE------------------------------------------- # path <- system.file("extdata", "cmap_instances_02.txt", package="signatureSearchData") # cmap_inst <- read.delim(path, check.names=FALSE) # comp_list <- sampleList(cmap_inst, myby="CMP_CELL") ## ----deg_limma, eval=FALSE----------------------------------------------- # mas5df <- readRDS("./data/mas5df.rds") # degList <- runLimma(df=log2(mas5df), comp_list, fdr=0.10, foldchange=1, verbose=TRUE) # saveRDS(degList, "./results/degList.rds") # saves entire degList ## ----se, eval=FALSE------------------------------------------------------ # degList <- readRDS("./results/degList.rds") # logMA <- degList$logFC # ## match colnames of logMA to '(drug)__(cell)__(factor)' format # colnames(logMA) <- gsub("(^.*)_(.*$)", "\\1__\\2__trt_cp", colnames(logMA)) # matrix2h5(logMA, "./data/cmap.h5", overwrite=TRUE) # 12403 X 3478 # ## Read in cmap.h5 by chunks # cmap_se <- readHDF5chunk("./data/cmap.h5", colindex=1:5) ## ----work_envir, eval=FALSE---------------------------------------------- # dir.create("data"); dir.create("data/CEL"); dir.create("results") ## ----download_cmap, eval=FALSE------------------------------------------- # getCmapCEL(rerun=FALSE) ## ----get_cel_type, eval=FALSE-------------------------------------------- # library(affxparser) # celfiles <- list.files("./data/CEL", pattern=".CEL$") # chiptype <- sapply(celfiles, function(x) affxparser::readCelHeader(paste0("data/CEL/", x))$chiptype) # saveRDS(chiptype, "./data/chiptype.rds") ## ----normalize_chips, eval=FALSE----------------------------------------- # chiptype <- readRDS("./data/chiptype.rds") # chiptype_list <- split(names(chiptype), as.character(chiptype)) # normalizeCel(chiptype_list, batchsize=200, rerun=FALSE) ## ----comb_chip_type_data, eval=FALSE------------------------------------- # chiptype_dir <- unique(chiptype) # combineResults(chiptype_dir, rerun=FALSE) # mas5df <- combineNormRes(chiptype_dir, norm_method="MAS5") ## ----prof2gene, eval=FALSE----------------------------------------------- # myAnnot <- readRDS("./data/myAnnot.rds") # mas5df <- probe2gene(mas5df, myAnnot) # saveRDS(mas5df,"./data/mas5df.rds") ## ----rma2cmap_expr, eval=FALSE------------------------------------------- # mas5df <- readRDS("./data/mas5df.rds") # dim: 12403 x 7056 # path <- system.file("extdata", "cmap_instances_02.txt", package="signatureSearchData") # cmap_inst <- read.delim(path, check.names=FALSE) # cmap_drug_cell_expr <- meanExpr(mas5df, cmap_inst) # dim: 12403 X 3587 # saveRDS(cmap_drug_cell_expr, "./data/cmap_drug_cell_expr.rds") ## ----gen_cmap_expr, eval=FALSE------------------------------------------- # cmap_drug_cell_expr <- readRDS("./data/cmap_drug_cell_expr.rds") # ## match colnames to '(drug)__(cell)__(factor)' format # colnames(cmap_drug_cell_expr) <- gsub("(^.*)_(.*$)", "\\1__\\2__trt_cp", # colnames(cmap_drug_cell_expr)) # matrix2h5(cmap_drug_cell_expr, "./data/cmap_expr.h5", overwrite=TRUE) # ## Read in cmap_expr.h5 by chunks # cmap_expr_se <- readHDF5chunk("./data/cmap_expr.h5", colindex=1:5) ## ----sessionInfo--------------------------------------------------------- sessionInfo()