## ----style, eval=TRUE, echo=FALSE, results='asis'-------------------------- BiocStyle::latex() ## ----include=FALSE--------------------------------------------------------- library(knitr) opts_chunk$set(tidy=FALSE) ## ----Homo.sapiens---------------------------------------------------------- library(RSQLite) library(Homo.sapiens) columns(Homo.sapiens) ## ----Homo.sapiens2--------------------------------------------------------- keytypes(Homo.sapiens) ## ----Homo.sapiens3--------------------------------------------------------- k <- head(keys(Homo.sapiens,keytype="ENTREZID")) k ## ----Homo.sapiens4--------------------------------------------------------- result <- select(Homo.sapiens, keys=k, columns=c("TXNAME","TXSTART","TXSTRAND"), keytype="ENTREZID") head(result) ## ----BFC_manage,echo=FALSE------------------------------------------------- .get_file <- function(fullUri) { bfc <- BiocFileCache::BiocFileCache() BiocFileCache::bfcrpath( bfc, rname = fullUri, exact = TRUE, download = TRUE, rtype = "web" ) } ## ----URI Example----------------------------------------------------------- uri <- 'http://rest.uniprot.org/uniprotkb/search?query=' ids <- c('P13368', 'Q6GZX4') idStr <- paste(ids, collapse=utils::URLencode(" OR ")) format <- '&format=tsv' fullUri <- paste0(uri,idStr,format) uquery <- .get_file(fullUri) read.delim(uquery) ## ----web service code------------------------------------------------------ getUniprotGoodies <- function(query, columns) { ## query and columns start as a character vectors qstring <- paste(query, collapse="+or+") cstring <- paste(columns, collapse=",") uri <- 'http://www.uniprot.org/uniprot/?query=' fullUri <- paste0(uri,qstring,'&format=tab&columns=',cstring) dat <- read.delim(fullUri, stringsAsFactors=FALSE) ## now remove things that were not in the specific original query... dat <- dat[dat[,1] %in% query,] dat } ## ----xml_tree-------------------------------------------------------------- library(XML) uri <- "http://rest.uniprot.org/uniprotkb/search?query=P13368%20OR%20Q6GZX4&format=xml" fl <- tempfile() download.file(uri, fl) xml <- xmlTreeParse(fl, useInternalNodes=TRUE) ## ----xml_namespace--------------------------------------------------------- defs <- xmlNamespaceDefinitions(xml, recurisve=TRUE) defs ## ----xml_namespace_struct-------------------------------------------------- ns <- structure(sapply(defs, function(x) x$uri), names=names(defs)) ## ----xml_namespace2-------------------------------------------------------- entry <- getNodeSet(xml, "//ns:entry", "ns") xmlSize(entry) ## ----xml_xmlAttrs---------------------------------------------------------- nms <- xpathSApply(xml, "//ns:entry/ns:name", xmlValue, namespaces="ns") attrs <- xpathApply(xml, "//ns:entry", xmlAttrs, namespaces="ns") names(attrs) <- nms attrs ## ----xml_xmlChildren------------------------------------------------------- fun1 <- function(elt) unique(names(xmlChildren(elt))) xpathApply(xml, "//ns:entry", fun1, namespaces="ns") ## ----xml_feature_type------------------------------------------------------ Q6GZX4 <- "//ns:entry[ns:accession='Q6GZX4']/ns:feature" xmlSize(getNodeSet(xml, Q6GZX4, namespaces="ns")) P13368 <- "//ns:entry[ns:accession='P13368']/ns:feature" xmlSize(getNodeSet(xml, P13368, namespaces="ns")) ## ----xml_feature_type2----------------------------------------------------- path <- "//ns:feature" unique(xpathSApply(xml, path, xmlGetAttr, "type", namespaces="ns")) ## ----xml_feature_type_P13368----------------------------------------------- path <- "//ns:entry[ns:accession='P13368']/ns:feature[@type='sequence conflict']" data.frame(t(xpathSApply(xml, path, xmlAttrs, namespaces="ns"))) ## ----xml_sequence---------------------------------------------------------- library(Biostrings) path <- "//ns:entry/ns:sequence" seqs <- xpathSApply(xml, path, xmlValue, namespaces="ns") aa <- AAStringSet(unlist(lapply(seqs, function(elt) gsub("\n", "", elt)), use.names=FALSE)) names(aa) <- nms aa ## ----WebServiceObject------------------------------------------------------ setClass("uniprot", representation(name="character"), prototype(name="uniprot")) ## ----makeInstanceWebServiceObj--------------------------------------------- uniprot <- new("uniprot") ## ----onLoad2,eval=FALSE---------------------------------------------------- # .onLoad <- function(libname, pkgname) # { # ns <- asNamespace(pkgname) # uniprot <- new("uniprot") # assign("uniprot", uniprot, envir=ns) # namespaceExport(ns, "uniprot") # } ## ----keytypeUniprot-------------------------------------------------------- setMethod("keytypes", "uniprot",function(x){return("UNIPROT")}) uniprot <- new("uniprot") keytypes(uniprot) ## ----keytypeUniprot2------------------------------------------------------- setMethod("columns", "uniprot", function(x){return(c("ID", "SEQUENCE", "ORGANISM"))}) columns(uniprot) ## ----webServiceSelect------------------------------------------------------ .select <- function(x, keys, columns){ colsTranslate <- c(id='ID', sequence='SEQUENCE', organism='ORGANISM') columns <- names(colsTranslate)[colsTranslate %in% columns] getUniprotGoodies(query=keys, columns=columns) } setMethod("select", "uniprot", function(x, keys, columns, keytype) { .select(keys=keys, columns=columns) }) ## ----webServiceSelect2, eval=FALSE----------------------------------------- # select(uniprot, keys=c("P13368","P20806"), columns=c("ID","ORGANISM")) ## ----classicConn,results='hide'-------------------------------------------- drv <- SQLite() library("org.Hs.eg.db") con_hs <- dbConnect(drv, dbname=org.Hs.eg_dbfile()) con_hs ## ----ourConn2,eval=FALSE--------------------------------------------------- # org.Hs.eg.db$conn # ## or better we can use a helper function to wrap this: # AnnotationDbi::dbconn(org.Hs.eg.db) # ## or we can just call the provided convenience function # ## from when this package loads: # org.Hs.eg_dbconn() ## ----dbListTables---------------------------------------------------------- head(dbListTables(con_hs)) dbListFields(con_hs, "alias") ## ----dbGetQuery------------------------------------------------------------ dbGetQuery(con_hs, "SELECT * FROM metadata") ## ----dbListTables2--------------------------------------------------------- head(dbListTables(con_hs)) ## ----dbListFields2--------------------------------------------------------- dbListFields(con_hs, "chromosomes") ## ----dbGetQuery2----------------------------------------------------------- head(dbGetQuery(con_hs, "SELECT * FROM chromosomes")) ## ----Anopheles,eval=FALSE-------------------------------------------------- # head(dbGetQuery(con_hs, "SELECT * FROM chromosome_locations")) # ## Then only retrieve human records # ## Query: SELECT * FROM Anopheles_gambiae WHERE species='HOMSA' # head(dbGetQuery(con_hs, "SELECT * FROM chromosome_locations WHERE seqname='1'")) # dbDisconnect(con_hs) ## ----getMetadata, echo=FALSE----------------------------------------------- library(org.Hs.eg.db) org.Hs.eg_dbInfo() ## ----OrgDbClass,eval=FALSE------------------------------------------------- # showClass("OrgDb") ## ----onLoad,eval=FALSE----------------------------------------------------- # sPkgname <- sub(".db$","",pkgname) # db <- loadDb(system.file("extdata", paste(sPkgname, # ".sqlite",sep=""), package=pkgname, lib.loc=libname), # packageName=pkgname) # dbNewname <- AnnotationDbi:::dbObjectName(pkgname,"OrgDb") # ns <- asNamespace(pkgname) # assign(dbNewname, db, envir=ns) # namespaceExport(ns, dbNewname) ## ----columns,eval=FALSE---------------------------------------------------- # .cols <- function(x) # { # con <- AnnotationDbi::dbconn(x) # list <- dbListTables(con) # ## drop unwanted tables # unwanted <- c("map_counts","map_metadata","metadata") # list <- list[!list %in% unwanted] # # use on.exit to disconnect # # on.exit(dbDisconnect(con)) # ## Then just to format things in the usual way # toupper(list) # } # # ## Then make this into a method # setMethod("columns", "OrgDb", .cols(x)) # ## Then we can call it # columns(org.Hs.eg.db) ## ----keytypes,eval=FALSE--------------------------------------------------- # setMethod("keytypes", "OrgDb", function(x) .cols(x)) # ## Then we can call it # keytypes(org.Hs.eg.db) # # ## refactor of .cols # .getLCcolnames <- function(x) # { # con <- AnnotationDbi::dbconn(x) # list <- dbListTables(con) # ## drop unwanted tables # unwanted <- c("map_counts","map_metadata","metadata") # # use on.exit to disconnect # # on.exit(dbDisconnect(con)) # list[!list %in% unwanted] # } # .cols <- function(x) # { # list <- .getLCcolnames(x) # ## Then just to format things in the usual way # toupper(list) # } # ## Test: # columns(org.Hs.eg.db) # # ## new helper function: # .getTableNames <- function(x) # { # LC <- .getLCcolnames(x) # UC <- .cols(x) # names(UC) <- LC # UC # } # .getTableNames(org.Hs.eg.db) ## ----keys,eval=FALSE------------------------------------------------------- # .keys <- function(x, keytype) # { # ## translate keytype back to table name # tabNames <- .getTableNames(x) # lckeytype <- names(tabNames[tabNames %in% keytype]) # ## get a connection # con <- AnnotationDbi::dbconn(x) # sql <- paste("SELECT _id FROM", lckeytype, "WHERE species != 'HOMSA'") # res <- dbGetQuery(con, sql) # res <- as.vector(t(res)) # dbDisconnect(con) # res # } # # setMethod("keys", "ExampleDbClass", .keys) # ## Then we would call it # keys(example.db, "TRICHOPLAX_ADHAERENS") ## ----makeNewDb------------------------------------------------------------- drv <- dbDriver("SQLite") dbname <- file.path(tempdir(), "myNewDb.sqlite") con <- dbConnect(drv, dbname=dbname) ## ----exampleFrame---------------------------------------------------------- data = data.frame(id=c(1,2,9), string=c("Blue", "Red", "Green"), stringsAsFactors=FALSE) ## ----exercise2------------------------------------------------------------- dbGetQuery(con, "CREATE Table genePheno (id INTEGER, string TEXT)") ## ----LabelledPreparedQueries----------------------------------------------- names(data) <- c("id","string") sql <- "INSERT INTO genePheno VALUES ($id, $string)" dbBegin(con) res <- dbSendQuery(con,sql) dbBind(res, data) dbFetch(res) dbClearResult(res) dbCommit(con) ## ----ATTACH---------------------------------------------------------------- db <- system.file("extdata", "TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite", package="TxDb.Hsapiens.UCSC.hg19.knownGene") dbGetQuery(con, sprintf("ATTACH '%s' AS db",db)) ## ----ATTACHJoin------------------------------------------------------------ sql <- "SELECT * FROM db.gene AS dbg, genePheno AS gp WHERE dbg.gene_id=gp.id" res <- dbGetQuery(con, sql) res ## ----SessionInfo, echo=FALSE----------------------------------------------- sessionInfo()