## ----style, echo = FALSE, results = 'asis'-------------------------------
BiocStyle::markdown()

## ----setup, echo=FALSE, warning=FALSE-------------------------------------------------------------
options(max.print=1000, width=100)
knitr::opts_chunk$set(cache=TRUE)
suppressPackageStartupMessages({
    library(org.Hs.eg.db)
    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
    library(BSgenome.Hsapiens.UCSC.hg19)
    library(GenomicRanges)
    library(biomaRt)
    library(rtracklayer)
    library(Gviz)
    library(AnnotationHub)
})

## ----select-setup---------------------------------------------------------------------------------
ensids <- c("ENSG00000130720", "ENSG00000103257", "ENSG00000156414", 
            "ENSG00000144644", "ENSG00000159307", "ENSG00000144485")

## ----select---------------------------------------------------------------------------------------
library(org.Hs.eg.db)
keytypes(org.Hs.eg.db)
columns(org.Hs.eg.db)
cols <- c("SYMBOL", "GENENAME")
select(org.Hs.eg.db, keys=ensids, columns=cols, keytype="ENSEMBL")

## ----biomaRt1, eval=FALSE, results="hide"---------------------------------------------------------
#  ## NEEDS INTERNET ACCESS !!
#  library(biomaRt)
#  head(listMarts(), 3)                      ## list the marts
#  head(listDatasets(useMart("ensembl")), 3) ## mart datasets
#  ensembl <-                                ## fully specified mart
#      useMart("ensembl", dataset = "hsapiens_gene_ensembl")
#  
#  head(listFilters(ensembl), 3)             ## filters
#  myFilter <- "chromosome_name"
#  substr(filterOptions(myFilter, ensembl), 1, 50) ## return values
#  myValues <- c("21", "22")
#  head(listAttributes(ensembl), 3)          ## attributes
#  myAttributes <- c("ensembl_gene_id","chromosome_name")
#  
#  ## assemble and query the mart
#  res <- getBM(attributes =  myAttributes, filters =  myFilter,
#               values =  myValues, mart = ensembl)

## ----symbol-to-entrez-----------------------------------------------------------------------------
library(org.Hs.eg.db)
eid <- select(org.Hs.eg.db, "BRCA1", "ENTREZID", "SYMBOL")[["ENTREZID"]]

## ----entrez-to-tx, message=FALSE------------------------------------------------------------------
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene
txid <- select(txdb, eid, "TXNAME", "GENEID")[["TXNAME"]]

## ----tx-to-cds-coords-----------------------------------------------------------------------------
cds <- cdsBy(txdb, by="tx", use.names=TRUE)
brca1cds <- cds[names(cds) %in% txid]
class(brca1cds)
length(brca1cds)
brca1cds[[1]]                           # exons in cds
cdswidth <- width(brca1cds)             # width of each exon
all((sum(cdswidth) %% 3) == 0)          # sum within cds, modulus 3

## ----Gviz, message=FALSE--------------------------------------------------------------------------
library(Gviz)
grt <- GeneRegionTrack(txdb)
plotTracks(list(GenomeAxisTrack(), grt), chromosome="chr17",
    from=min(start(unlist(brca1cds))),
    to=max(end(unlist(brca1cds))))

## ----cds-to-seq-----------------------------------------------------------------------------------
library(BSgenome.Hsapiens.UCSC.hg19)
genome <- BSgenome.Hsapiens.UCSC.hg19
tx_seq <- extractTranscriptSeqs(genome, brca1cds)
tx_seq

## ----introns--------------------------------------------------------------------------------------
introns <- psetdiff(unlist(range(brca1cds)), brca1cds)

## ----intron-seqs----------------------------------------------------------------------------------
seq <- getSeq(genome, introns)
names(seq)
seq[["uc010whl.2"]]                     # 21 introns

## ----rtracklayer-roi------------------------------------------------------------------------------
library(GenomicRanges)
roi <- GRanges("chr10", IRanges(92106877, 112106876, names="ENSG00000099194"))

## ----rtracklayer-session, eval=FALSE--------------------------------------------------------------
#  library(rtracklayer)
#  session <- browserSession()

## ----rtracklayer-marks, eval=FALSE----------------------------------------------------------------
#  trackName <- "wgEncodeRegTfbsClusteredV2"
#  tableName <- "wgEncodeRegTfbsClusteredV2"
#  trFactor <- "ERalpha_a"
#  ucscTable <- getTable(ucscTableQuery(session, track=trackName,
#      range=roi, table=tableName, name=trFactor))

## ----rtracklayer-plot, fig.height=3, eval=FALSE---------------------------------------------------
#  plot(score ~ chromStart, ucscTable, pch="+")
#  abline(v=start(roi) + (end(roi) - start(roi) + 1) / 2, col="blue")