## ---- echo = FALSE------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ---- results = "hide", message = FALSE, warning=FALSE------------------- library(IsoformSwitchAnalyzeR) ## ------------------------------------------------------------------------ data("exampleSwitchList") exampleSwitchList ## ---- results = "hide", message = FALSE, warning=FALSE------------------- ### isoformSwitchAnalysisPart1 needs the genomic sequence to predict ORFs # Genome sequences are available from Biocindoctor as BSgenome objects: # http://bioconductor.org/packages/release/BiocViews.html#___BSgenome # Here we use the gg19 reference genome - which can be downloaded by # copy/pasting the following two lines into the R terminal: # source("https://bioconductor.org/biocLite.R") # biocLite("BSgenome.Hsapiens.UCSC.hg19") library(BSgenome.Hsapiens.UCSC.hg19) exampleSwitchList <- isoformSwitchAnalysisPart1( input=exampleSwitchList, genomeObject = Hsapiens, # the reference to the human BS genome dIFcutoff = 0.4, # Cutoff for finding switches - set high for short runtime in example data outputSequences = FALSE # prevents outputting of the fasta files used for external sequence analysis ) ## ------------------------------------------------------------------------ extractSwitchSummary(exampleSwitchList, dIFcutoff = 0.4) # supply the same cutoff to the summary function ## ---- results = "hide", message = FALSE---------------------------------- exampleSwitchList <- isoformSwitchAnalysisPart2( switchAnalyzeRlist = exampleSwitchList, dIFcutoff = 0.4, # Cutoff for finding switches - set high for short runtime in example data n = 10, # if plotting was enabled, it would only output the top 10 switches removeNoncodinORFs = TRUE, # Because ORF was predicted de novo pathToCPATresultFile = system.file("extdata/cpat_results.txt" , package = "IsoformSwitchAnalyzeR"), pathToPFAMresultFile = system.file("extdata/pfam_results.txt" , package = "IsoformSwitchAnalyzeR"), pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR"), codingCutoff = 0.725, # the coding potential cutoff we suggested for human outputPlots = FALSE # keeps the function from outputting the plots from this example ) ## ------------------------------------------------------------------------ extractSwitchSummary(exampleSwitchList, filterForConsequences = TRUE, dIFcutoff = 0.4) # supply the same cutoff to the summary function ## ---- message = FALSE---------------------------------------------------- extractTopSwitches(exampleSwitchList, filterForConsequences = TRUE, n=3) ## ---- fig.width=12, fig.height=7, warning=FALSE-------------------------- switchPlot(exampleSwitchList, gene='KIF1B') ## ------------------------------------------------------------------------ data("exampleSwitchListAnalyzed") ## ---- fig.width=9, fig.height=5------------------------------------------ extractSwitchSummary( exampleSwitchListAnalyzed, filterForConsequences=TRUE ) ## ---- fig.width=9, fig.height=5------------------------------------------ extractSwitchOverlap( exampleSwitchListAnalyzed, filterForConsequences=TRUE ) ## ---- fig.width=12, fig.height=5----------------------------------------- extractConsequenceSummary( exampleSwitchListAnalyzed, consequencesToAnalyze='all', plotGenes = FALSE, # enables analysis of genes (instead of isoforms) asFractionTotal = FALSE # enables analysis of fraction of significant features ) ## ---- fig.width=12, fig.height=5----------------------------------------- extractConsequenceEnrichment( exampleSwitchListAnalyzed, consequencesToAnalyze='all', analysisOppositeConsequence = TRUE ) ## ---- fig.width=12, fig.height=4----------------------------------------- extractConsequenceEnrichmentComparison( exampleSwitchListAnalyzed, consequencesToAnalyze=c('domains_identified','intron_retention','coding_potential'), analysisOppositeConsequence = T ) ## ---- fig.width=12, fig.height=4.5--------------------------------------- extractSplicingEnrichment(exampleSwitchListAnalyzed) ## ---- fig.width=8, fig.height=4, warning=FALSE--------------------------- data("exampleSwitchListAnalyzed") ### Vulcano like plot: ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=dIF, y=-log10(isoform_switch_q_value))) + geom_point( aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff size=1 ) + geom_hline(yintercept = -log10(0.05), linetype='dashed') + # default cutoff geom_vline(xintercept = c(-0.1, 0.1), linetype='dashed') + # default cutoff facet_wrap( ~ condition_2) + #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) + labs(x='dIF', y='-Log10 ( Isoform Switch Q Value )') + theme_bw() ## ---- fig.width=8, fig.height=4, warning=FALSE--------------------------- ### Switch vs Gene changes: ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=gene_log2_fold_change, y=dIF)) + geom_point( aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff size=1 ) + facet_wrap(~ condition_2) + #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions geom_hline(yintercept = 0, linetype='dashed') + geom_vline(xintercept = 0, linetype='dashed') + scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) + labs(x='Gene log2 fold change', y='dIF') + theme_bw() ## ------------------------------------------------------------------------ data("exampleSwitchList") # A newly created switchAnalyzeRlist + switch analysis names(exampleSwitchList) data("exampleSwitchListAnalyzed") # A fully analyzed switchAnalyzeRlist names(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ ### Preview head(exampleSwitchList, 2) # tail(exampleSwitchList, 2) ### Dimentions dim(exampleSwitchList$isoformFeatures) nrow(exampleSwitchList) ncol(exampleSwitchList) dim(exampleSwitchList) ## ------------------------------------------------------------------------ exampleSwitchList ### Subset subsetSwitchAnalyzeRlist( exampleSwitchList, exampleSwitchList$isoformFeatures$gene_name == 'ARHGEF19' ) ## ------------------------------------------------------------------------ head(exampleSwitchList$exons,2) ## ---- warning=FALSE, message=FALSE--------------------------------------- cuffDB <- prepareCuffExample() cuffDB ## ---- warning=FALSE------------------------------------------------------ aSwitchList <- importCufflinksCummeRbund(cuffDB) aSwitchList ## ---- warning=FALSE------------------------------------------------------ ### Please note # The way of importing file in this example with # "system.file('pathToFile', package="cummeRbund") is # specialiced to access the example data in the cummeRbund package # and not something you need to do - just supply the string e.g. # "/myAnnotation/myGenome/myFile.filetype" to the argument testSwitchList <- importCufflinksFiles( pathToGTF = system.file('extdata/chr1_snippet.gtf', package = "cummeRbund"), pathToGeneDEanalysis = system.file('extdata/gene_exp.diff', package = "cummeRbund"), pathToIsoformDEanalysis = system.file('extdata/isoform_exp.diff', package = "cummeRbund"), pathToGeneFPKMtracking = system.file('extdata/genes.fpkm_tracking', package = "cummeRbund"), pathToIsoformFPKMtracking = system.file('extdata/isoforms.fpkm_tracking', package = "cummeRbund"), pathToIsoformReadGroupTracking = system.file('extdata/isoforms.read_group_tracking', package = "cummeRbund"), pathToSplicingAnalysis = system.file('extdata/splicing.diff', package = "cummeRbund"), pathToReadGroups = system.file('extdata/read_groups.info', package = "cummeRbund"), pathToRunInfo = system.file('extdata/run.info', package = "cummeRbund"), fixCufflinksAnnotationProblem=TRUE, quiet=TRUE ) testSwitchList ## ---- warning=FALSE, message=FALSE--------------------------------------- ### Construct data needed from example data in cummeRbund package ### (The recomended way of analyzing Cufflinks/Cuffdiff data is via importCufflinksCummeRbund() ### This is just an easy way to get some example data ). cuffDB <- prepareCuffExample() ## ------------------------------------------------------------------------ # Extract count matrix isoRepCount <- repCountMatrix(isoforms(cuffDB)) isoRepCount$isoform_id <- rownames(isoRepCount) # Extract design matrix designMatrix <- cummeRbund::replicates(cuffDB)[,c('rep_name','sample_name')] colnames(designMatrix) <- c('sampleID','condition') # Extract isoform structure localAnnotaion <- import(system.file("extdata/chr1_snippet.gtf", package="cummeRbund")) localAnnotaion <- localAnnotaion[,c('transcript_id','gene_id')] colnames(localAnnotaion@elementMetadata)[1] <- 'isoform_id' ### Please note # 1) The way of importing the GTF file in this example with # "system.file('pathToFile', package="cummeRbund") is # specialized to access the example data in the cummeRbund package # and not somhting you need to do - just supply the string e.g. # "/myAnnotation/myGenome/gersionQuantified.gtf" to the import function # 2) importRdata also supports direct import of a GTF file - just supply the # string to the "isoformExonAnnoation"" argument ### Take a look at the data head(isoRepCount, 2) designMatrix head(localAnnotaion, 3) ## ------------------------------------------------------------------------ ### Create switchAnalyzeRlist aSwitchList <- importRdata( isoformCountMatrix=isoRepCount, designMatrix=designMatrix, isoformExonAnnoation=localAnnotaion, # alternatively supply a string that points directly to the GTF file showProgress=FALSE ) aSwitchList ## ---- warning=FALSE, message=FALSE--------------------------------------- aSwitchList <- importGTF(pathToGTF = system.file("extdata/chr1_snippet.gtf", package = "cummeRbund")) ## ------------------------------------------------------------------------ aSwitchList head(aSwitchList,2) head(aSwitchList$conditions,2) ## ------------------------------------------------------------------------ data("exampleSwitchList") exampleSwitchList exampleSwitchListFiltered <- preFilter(exampleSwitchList, geneExpressionCutoff = 1, isoformExpressionCutoff = 0, removeSingleIsoformGenes = TRUE) exampleSwitchListFilteredStrict <- preFilter(exampleSwitchList, geneExpressionCutoff = 10, isoformExpressionCutoff = 3, removeSingleIsoformGenes = TRUE) ## ---- warning=FALSE------------------------------------------------------ # Load example data and pre-filter data("exampleSwitchList") exampleSwitchList <- preFilter(exampleSwitchList) # preFilter for fast runtime # Perform test (takes ~10 sec) exampleSwitchListAnalyzed <- isoformSwitchTestDRIMSeq( switchAnalyzeRlist = exampleSwitchList, testIntegration='isoform_only', reduceToSwitchingGenes=TRUE ) # Summarize switching features extractSwitchSummary(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ ### Show arguments of function args(isoformSwitchTest) ## ------------------------------------------------------------------------ # Load example data and prefilter data("exampleSwitchList") exampleSwitchList <- preFilter(exampleSwitchList) # preFilter for fast runtime # Perfom test exampleSwitchListAnalyzed <- isoformSwitchTest(exampleSwitchList) # Summarize swiching geatures extractSwitchSummary(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ extractCalibrationStatus(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ # Perfom test exampleSwitchListAnalyzed <- isoformSwitchTest(exampleSwitchList, calibratePvalues = FALSE) # Summarize swiching geatures extractSwitchSummary(exampleSwitchListAnalyzed) # check callibration status extractCalibrationStatus(exampleSwitchListAnalyzed) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Usage of The Statistical Test' section above ### analyzeORF needs the genomic sequence to predict ORFs. # These are readily available from Bioconductor as BSgenome orbjects: # http://bioconductor.org/packages/release/BiocViews.html#___BSgenome # Here we use Hg19 --- which can be downloaded by copy/pasting the following two lines into the R termminal: # source("https://bioconductor.org/biocLite.R") # biocLite("BSgenome.Hsapiens.UCSC.hg19") library(BSgenome.Hsapiens.UCSC.hg19) exampleSwitchListAnalyzed <- analyzeORF(exampleSwitchListAnalyzed, genomeObject = Hsapiens, showProgress=FALSE) head(exampleSwitchListAnalyzed$orfAnalysis, 3) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Analyzing Open Reading Frames' section above exampleSwitchListAnalyzed <- extractSequence( exampleSwitchListAnalyzed, genomeObject = Hsapiens, pathToOutput = '', writeToFile=FALSE # to avoid output when running this example data ) head(exampleSwitchListAnalyzed$ntSequence,2) head(exampleSwitchListAnalyzed$aaSequence,2) ## ------------------------------------------------------------------------ ### Load test data (maching the external sequence analysis results) data("exampleSwitchListIntermediary") exampleSwitchListIntermediary ### Add PFAM analysis exampleSwitchListAnalyzed <- analyzePFAM( switchAnalyzeRlist = exampleSwitchListIntermediary, pathToPFAMresultFile = system.file("extdata/pfam_results.txt", package = "IsoformSwitchAnalyzeR"), showProgress=FALSE ) ### Add SignalP analysis exampleSwitchListAnalyzed <- analyzeSignalP( switchAnalyzeRlist = exampleSwitchListAnalyzed, pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR") ) ### Add CPAT analysis exampleSwitchListAnalyzed <- analyzeCPAT( switchAnalyzeRlist = exampleSwitchListAnalyzed, pathToCPATresultFile = system.file("extdata/cpat_results.txt", package = "IsoformSwitchAnalyzeR"), codingCutoff = 0.725, # the coding potential cutoff we suggested for human removeNoncodinORFs = TRUE # because ORF was predicted de novo ) exampleSwitchListAnalyzed ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Importing External Sequence Analysis' section above exampleSwitchListAnalyzed <- analyzeAlternativeSplicing(exampleSwitchListAnalyzed, quiet=TRUE) ### overview of number of intron retentions (IR) table(exampleSwitchListAnalyzed$isoformFeatures$IR) ## ------------------------------------------------------------------------ ### This example relies on the example data from the 'Predicting Alternative Splicing' section above # the consequences highlighted in the text above consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status','domains_identified','ORF_seq_similarity') exampleSwitchListAnalyzed <- analyzeSwitchConsequences( exampleSwitchListAnalyzed, consequencesToAnalyze = consequencesOfInterest, dIFcutoff = 0.4, # very high cutoff for fast runtimes showProgress=FALSE ) extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = FALSE) extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = TRUE) ## ------------------------------------------------------------------------ ### Load already analyzed data data("exampleSwitchListAnalyzed") ### Let's reduce the switchAnalyzeRlist to only one condition exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist( exampleSwitchListAnalyzed, exampleSwitchListAnalyzed$isoformFeatures$condition_1 == 'COAD_ctrl' ) exampleSwitchListAnalyzedSubset ### Extract top switching genes (by q-value) extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = 2, sortByQvals = TRUE ) ### Extract top 2 switching genes (by dIF values) extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = 2, sortByQvals = FALSE ) ## ------------------------------------------------------------------------ ### Extract data.frame with all switching isoforms switchingIso <- extractTopSwitches( exampleSwitchListAnalyzedSubset, filterForConsequences = TRUE, n = NA, # n=NA: all features are returned extractGenes = FALSE, # when FALSE isoforms are returned sortByQvals = TRUE ) subset(switchingIso, gene_name == 'ZAK') ## ---- fig.width=12, fig.height=7----------------------------------------- switchPlot(exampleSwitchListAnalyzedSubset, gene = 'ZAK') ## ---- fig.width=12, fig.height=3----------------------------------------- ### Load already analyzed data data("exampleSwitchListAnalyzed") switchPlotTranscript(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B') ## ---- fig.width=3, fig.height=3------------------------------------------ switchPlotGeneExp (exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC') ## ---- fig.width=4, fig.height=3------------------------------------------ switchPlotIsoExp(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC') ## ---- fig.width=4, fig.height=3------------------------------------------ switchPlotIsoUsage(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC') ## ------------------------------------------------------------------------ data("exampleSwitchListIntermediary") ifMatrix <- extractExpressionMatrix(exampleSwitchListAnalyzed) head(ifMatrix) dim(ifMatrix) ## ---- fig.width=4, fig.height=3------------------------------------------ # correlation plot ggplot(melt(cor(ifMatrix)), aes(x=Var1, y=Var2, fill=value)) + geom_tile() + scale_fill_continuous('Correlation') + labs(x='Condition', y='Condition') + theme_minimal() + theme(axis.text.x=element_text(angle=-90, hjust = 0, vjust=0.5)) # turn x-axis 90 degrees ## ---- fig.width=8, fig.height=4------------------------------------------ library(pheatmap) pheatmap(t(ifMatrix), show_colnames=FALSE) ## ------------------------------------------------------------------------ data("exampleSwitchListAnalyzed") exampleSwitchListAnalyzed ## ---- fig.width=12, fig.height=5----------------------------------------- extractSplicingSummary( exampleSwitchListAnalyzed, asFractionTotal = FALSE, plotGenes=FALSE ) ## ---- fig.width=12, fig.height=4.5--------------------------------------- splicingEnrichment <- extractSplicingEnrichment( exampleSwitchListAnalyzed, splicingToAnalyze='all', returnResult=TRUE, returnSummary=TRUE ) ## ---- fig.width=12, fig.height=6----------------------------------------- subset(splicingEnrichment, splicingEnrichment$AStype == 'ATSS') ## ---- fig.width=12, fig.height=4.5--------------------------------------- extractSplicingEnrichmentComparison( exampleSwitchListAnalyzed, splicingToAnalyze=c('A3','MES','ATSS'), # Those significant in COAD in the previous analysis returnResult=FALSE # Preventing the summary statistics to be returned as a data.frame ) ## ---- fig.width=10, fig.height=6----------------------------------------- extractSplicingGenomeWide( exampleSwitchListAnalyzed, featureToExtract = 'all', # all isoforms stored in the switchAnalyzeRlist splicingToAnalyze = c('A3','MES','ATSS'), # Splice types significantly enriched in COAD plot=TRUE, returnResult=FALSE # Preventing the summary statistics to be returned as a data.frame ) ## ---- results = "hide", message = FALSE---------------------------------- ### Load example data data("exampleSwitchListAnalyzed") ### Reduce datasize for fast runtime selectedGenes <- unique(exampleSwitchListAnalyzed$isoformFeatures$gene_id)[50:100] exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist( exampleSwitchListAnalyzed, exampleSwitchListAnalyzed$isoformFeatures$gene_id %in% selectedGenes ) ### analyze the biological mechanisms bioMechanismeAnalysis <- analyzeSwitchConsequences( exampleSwitchListAnalyzedSubset, consequencesToAnalyze = c('tss','tts','intron_structure'), showProgress = FALSE )$switchConsequence # only the consequences are interesting here ### subset to those with differences bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoformsDifferent),] ### extract the consequences of interest already stored in the switchAnalyzeRlist myConsequences <- exampleSwitchListAnalyzedSubset$switchConsequence myConsequences <- myConsequences[which(myConsequences$isoformsDifferent),] myConsequences$isoPair <- paste(myConsequences$isoformUpregulated, myConsequences$isoformDownregulated) # id for specific iso comparison ### Obtain the mechanisms of the isoform switches with consequences bioMechanismeAnalysis$isoPair <- paste(bioMechanismeAnalysis$isoformUpregulated, bioMechanismeAnalysis$isoformDownregulated) bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoPair %in% myConsequences$isoPair),] # id for specific iso comparison ## ---- fig.width=3.5, fig.height=3.5-------------------------------------- ### Create list with the isoPair ids for each consequencee AS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'intron_structure')] aTSS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tss' )] aTTS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tts' )] mechList <- list( AS=AS, aTSS=aTSS, aTTS=aTTS ) ### Create venn diagram library(VennDiagram) myVenn <- venn.diagram(mechList, col='transparent', alpha=0.4, fill=RColorBrewer::brewer.pal(n=3,name='Dark2'), filename=NULL) ### Plot the venn diagram grid.newpage() ; grid.draw(myVenn) ## ------------------------------------------------------------------------ sessionInfo()