################################################### ### chunk number 1: orgDemo ################################################### ##load the package library("org.Mm.eg.db") ##look what we just loaded ls(2) ##Data for the org packages comes from the latest UCSC data ##which is from NCBI (UCSC calls it mm9, NCBI Build 37.1) ##Have a peak: as.list(org.Mm.egCHRLOC)[1:4] ##Notice For each entrez gene ID, there is a start location for the UCSC genome ## negative values are the minus strand ## positve values are the positive strand ## for the stop locations use: as.list(org.Mm.egCHRLOCEND)[1:4] ##or can use get, mget etc. with the entrez gene ID EGs = c("18392","18414","56513") mget(EGs, org.Mm.egCHRLOC, ifnotfound=NA) mget(EGs, org.Mm.egCHRLOCEND, ifnotfound=NA) ##You can also retrieve ENSEMBL IDs using this package mget(EGs, org.Mm.egENSEMBL, ifnotfound=NA) ################################################### ### chunk number 2: biomaRtDemo ################################################### ##Getting the data from biomaRt: library("biomaRt") ##Choose a database listMarts()[1:5,] ##Get the current ensembl database. ensembl = useMart("ensembl") ##List the datasets therein listDatasets(ensembl)[1:10,] ##Then set up so that you use that for this session ##(we will choose the mouse one from NCBI build 37.1): ensembl = useDataset("mmusculus_gene_ensembl",mart=ensembl) ##List attributes attributes = listAttributes(ensembl) attributes[1:10,] ##And filters filters = listFilters(ensembl) filters[1:10,] ##Some entrez gene IDs EGs = c("18392","18414","56513") ##1st a Simple example to just get some gene names: getBM(attributes = "external_gene_id", filters = "entrezgene", values = EGs, mart=ensembl) ################################################### ### chunk number 3: biomartDemoContinued ################################################### ##Transcript starts and ends: getBM(attributes = c("entrezgene","transcript_start","transcript_end"), filters = "entrezgene", values = EGs, mart=ensembl) ################################################### ### chunk number 4: biomartDemoContinued2 ################################################### ##Additionally, you can get exon boundaries. ##But 1st you have to find out what the attributes are called... attributeSummary(ensembl) ##Lets zoom in on these exon/Structure attributes listAttributes(ensembl, category = "Structures", group = "EXON:") ################################################### ### chunk number 5: biomartDemoContinued3 eval=FALSE ################################################### ## ##Find the exon starts and stops for "56513" ## getBM(attributes = c("ensembl_exon_id","exon_chrom_start","exon_chrom_end"), ## filters = "entrezgene", ## values = "56513", ## mart=ensembl) ################################################### ### chunk number 6: GO and BiomaRt Example ################################################### ##We can also search based on GO terms library(GO.db) GOTERM[["GO:0016564"]] ##here is what we have for EGs affiliated with that term GOEGs = unique(org.Mm.egGO2EG[["GO:0016564"]]) GOEGs ##Then we can retrieve these from biomaRt like this: geneLocs <- getBM(c("ensembl_gene_id", "transcript_start", "transcript_end", "chromosome_name"), "entrezgene", GOEGs, mart=ensembl) ################################################### ### chunk number 7: SessionInfo ################################################### sessionInfo()