## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----install_eisaR, eval=FALSE------------------------------------------------ # # BiocManager is needed to install Bioconductor packages # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # # Install eisaR # BiocManager::install("eisaR") ## ----availableOnline, eval=FALSE---------------------------------------------- # pkgs <- c(BiocManager::available("TxDb"), # BiocManager::available("EnsDb")) ## ----annotation, message=FALSE------------------------------------------------ # load package library(eisaR) # get TxDb object txdbFile <- system.file("extdata", "hg19sub.sqlite", package = "eisaR") txdb <- AnnotationDbi::loadDb(txdbFile) ## ----regions------------------------------------------------------------------ # extract filtered exonic and gene body regions regS <- getRegionsFromTxDb(txdb = txdb, strandedData = TRUE) regU <- getRegionsFromTxDb(txdb = txdb, strandedData = FALSE) lengths(regS) lengths(regU) regS$exons ## ----exportregions------------------------------------------------------------ library(rtracklayer) export(regS$exons, "hg19sub_exons_stranded.gtf") export(regS$genebodies, "hg19sub_genebodies_stranded.gtf") ## ----extdata------------------------------------------------------------------ library(QuasR) file.copy(system.file(package = "QuasR", "extdata"), ".", recursive = TRUE) ## ----align-------------------------------------------------------------------- sampleFile <- file.path("extdata", "samples_rna_single.txt") ## Display the structure of the sampleFile read.delim(sampleFile) ## Perform the alignment proj <- qAlign(sampleFile = sampleFile, genome = file.path("extdata", "hg19sub.fa"), aligner = "Rhisat2", splicedAlignment = TRUE) alignmentStats(proj) ## ----count-------------------------------------------------------------------- cntEx <- qCount(proj, regU$exons, orientation = "any") cntGb <- qCount(proj, regU$genebodies, orientation = "any") cntIn <- cntGb - cntEx cntEx cntIn ## ----loadcounts--------------------------------------------------------------- cntEx <- readRDS(system.file("extdata", "Fig3abc_GSE33252_rawcounts_exonic.rds", package = "eisaR")) cntIn <- readRDS(system.file("extdata", "Fig3abc_GSE33252_rawcounts_intronic.rds", package = "eisaR")) ## ----runEISA------------------------------------------------------------------ # remove "width" column rEx <- cntEx[, colnames(cntEx) != "width"] rIn <- cntIn[, colnames(cntIn) != "width"] # create condition factor (contrast will be TN - ES) cond <- factor(c("ES", "ES", "TN", "TN")) # run EISA res <- runEISA(rEx, rIn, cond) ## ----compare------------------------------------------------------------------ res1 <- runEISA(rEx, rIn, cond, method = "Gaidatzis2015") res2 <- runEISA(rEx, rIn, cond) # number of quantifiable genes nrow(res1$DGEList) nrow(res2$DGEList) # number of genes with significant post-transcriptional regulation sum(res1$tab.ExIn$FDR < 0.05) sum(res2$tab.ExIn$FDR < 0.05) # method="Gaidatzis2015" results contain most of default results summary(rownames(res2$contrasts)[res2$tab.ExIn$FDR < 0.05] %in% rownames(res1$contrasts)[res1$tab.ExIn$FDR < 0.05]) # comparison of deltas ids <- intersect(rownames(res1$DGEList), rownames(res2$DGEList)) cor(res1$contrasts[ids, "Dex"], res2$contrasts[ids, "Dex"]) cor(res1$contrasts[ids, "Din"], res2$contrasts[ids, "Din"]) cor(res1$contrasts[ids, "Dex.Din"], res2$contrasts[ids, "Dex.Din"]) plot(res1$contrasts[ids, "Dex.Din"], res2$contrasts[ids, "Dex.Din"], pch = "*", xlab = expression(paste(Delta, "exon", -Delta, "intron for method='Gaidatzis2015'")), ylab = expression(paste(Delta, "exon", -Delta, "intron for default parameters"))) ## ----modelSamples------------------------------------------------------------- res3 <- runEISA(rEx, rIn, cond, modelSamples = FALSE) res4 <- runEISA(rEx, rIn, cond, modelSamples = TRUE) ids <- intersect(rownames(res3$contrasts), rownames(res4$contrasts)) # number of genes with significant post-transcriptional regulation sum(res3$tab.ExIn$FDR < 0.05) sum(res4$tab.ExIn$FDR < 0.05) # modelSamples=TRUE results are a super-set of # modelSamples=FALSE results summary(rownames(res3$contrasts)[res3$tab.ExIn$FDR < 0.05] %in% rownames(res4$contrasts)[res4$tab.ExIn$FDR < 0.05]) # comparison of contrasts diag(cor(res3$contrasts[ids, ], res4$contrasts[ids, ])) plot(res3$contrasts[ids, 3L], res4$contrasts[ids, 3L], pch = "*", xlab = "Interaction effects for modelSamples=FALSE", ylab = "Interaction effects for modelSamples=TRUE") # comparison of interaction significance plot(-log10(res3$tab.ExIn[ids, "FDR"]), -log10(res4$tab.ExIn[ids, "FDR"]), pch = "*", xlab = "-log10(FDR) for modelSamples=FALSE", ylab = "-log10(FDR) for modelSamples=TRUE") abline(a = 0.0, b = 1.0, col = "gray") legend("bottomright", "y = x", bty = "n", lty = 1L, col = "gray") ## ----plotEISA----------------------------------------------------------------- plotEISA(res) ## ----normalization------------------------------------------------------------ # remove column "width" rEx <- cntEx[, colnames(cntEx) != "width"] rIn <- cntIn[, colnames(cntIn) != "width"] rAll <- rEx + rIn fracIn <- colSums(rIn) / colSums(rAll) summary(fracIn) # scale counts to the mean library size, # separately for exons and introns normEx <- t(t(rEx) / colSums(rEx) * mean(colSums(rEx))) normIn <- t(t(rIn) / colSums(rIn) * mean(colSums(rIn))) # log transform (add a pseudocount of 8) lognormEx <- log2(normEx + 8L) lognormIn <- log2(normIn + 8L) ## ----quantgenes--------------------------------------------------------------- quantGenes <- rownames(rEx)[rowMeans(lognormEx) > 5.0 & rowMeans(lognormIn) > 5.0] length(quantGenes) ## ----dIdE--------------------------------------------------------------------- Dex <- lognormEx[, c("MmTN_RNA_total_a", "MmTN_RNA_total_b")] - lognormEx[, c("MmES_RNA_total_a", "MmES_RNA_total_b")] Din <- lognormIn[, c("MmTN_RNA_total_a", "MmTN_RNA_total_b")] - lognormIn[, c("MmES_RNA_total_a", "MmES_RNA_total_b")] Dex.Din <- Dex - Din cor(Dex[quantGenes, 1L], Dex[quantGenes, 2L]) cor(Din[quantGenes, 1L], Din[quantGenes, 2L]) cor(Dex.Din[quantGenes, 1L], Dex.Din[quantGenes, 2L]) ## ----sig---------------------------------------------------------------------- # create DGEList object with exonic and intronic counts library(edgeR) cnt <- data.frame(Ex = rEx, In = rIn) y <- DGEList(counts = cnt, genes = data.frame(ENTREZID = rownames(cnt))) # select quantifiable genes and normalize y <- y[quantGenes, ] y <- calcNormFactors(y) # design matrix with interaction term region <- factor(c("ex", "ex", "ex", "ex", "in", "in", "in", "in"), levels = c("in", "ex")) cond <- rep(factor(c("ES", "ES", "TN", "TN")), 2L) design <- model.matrix(~ region * cond) rownames(design) <- colnames(cnt) design # estimate model parameters y <- estimateDisp(y, design) fit <- glmFit(y, design) # calculate likelihood-ratio between full and reduced models lrt <- glmLRT(fit) # create results table tt <- topTags(lrt, n = nrow(y), sort.by = "none") head(tt$table[order(tt$table$FDR, decreasing = FALSE), ]) ## ----plot--------------------------------------------------------------------- sig <- tt$table$FDR < 0.05 sum(sig) sigDir <- sign(tt$table$logFC[sig]) cols <- ifelse(sig, ifelse(tt$table$logFC > 0.0, "#E41A1CFF", "#497AB3FF"), "#22222244") # volcano plot plot(tt$table$logFC, -log10(tt$table$FDR), col = cols, pch = 20L, xlab = expression(paste("RNA change (log2 ", Delta, "exon/", Delta, "intron)")), ylab = "Significance (-log10 FDR)") abline(h = -log10(0.05), lty = 2L) abline(v = 0.0, lty = 2L) ppar <- par("usr") text(x = ppar[1L] + 3.0 * par("cxy")[1L], y = ppar[4L], adj = c(0.0, 1.0), labels = sprintf("n=%d", sum(sigDir == -1L)), col = "#497AB3FF") text(x = ppar[2L] - 3.0 * par("cxy")[1L], y = ppar[4L], adj = c(1.0, 1.0), labels = sprintf("n=%d", sum(sigDir == 1L)), col = "#E41A1CFF") # Delta I vs. Delta E plot(rowMeans(Din)[quantGenes], rowMeans(Dex)[quantGenes], pch = 20L, col = cols, xlab = expression(paste(Delta, "intron (log2 TN/ES)")), ylab = expression(paste(Delta, "exon (log2 TN/ES)"))) legend(x = "bottomright", bty = "n", pch = 20L, col = c("#E41A1CFF", "#497AB3FF"), legend = sprintf("%s (%d)", c("Up", "Down"), c(sum(sigDir == 1L), sum(sigDir == -1L)))) ## ----sessionInfo-------------------------------------------------------------- sessionInfo()