## ---- echo = FALSE-------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## ---- results = "hide", message = FALSE, warning=FALSE-------------------
library(IsoformSwitchAnalyzeR)

## ------------------------------------------------------------------------
data("exampleSwitchList")
exampleSwitchList

## ---- results = "hide", message = FALSE, warning=FALSE-------------------
### isoformSwitchAnalysisPart1 needs the genomic sequence to predict ORFs

# Genome sequences are available from Biocindoctor as BSgenome objects: 
# http://bioconductor.org/packages/release/BiocViews.html#___BSgenome
# Here we use the gg19 reference genome - which can be downloaded by
# copy/pasting the following two lines into the R terminal:
# source("https://bioconductor.org/biocLite.R")
# biocLite("BSgenome.Hsapiens.UCSC.hg19")

library(BSgenome.Hsapiens.UCSC.hg19)

exampleSwitchList <- isoformSwitchAnalysisPart1(
    input=exampleSwitchList,
    genomeObject = Hsapiens, # the reference to the human BS genome
    dIFcutoff = 0.4,         # Cutoff for finding switches - set high for short runtime in example data
    outputSequences = FALSE  # prevents outputting of the fasta files used for external sequence analysis
)

## ------------------------------------------------------------------------
extractSwitchSummary(exampleSwitchList, dIFcutoff = 0.4) # supply the same cutoff to the summary function

## ---- results = "hide", message = FALSE----------------------------------
exampleSwitchList <- isoformSwitchAnalysisPart2(
    switchAnalyzeRlist      = exampleSwitchList, 
    dIFcutoff               = 0.4,   # Cutoff for finding switches - set high for short runtime in example data
    n                       = 10,    # if plotting was enabled, it would only output the top 10 switches
    removeNoncodinORFs      = TRUE,  # Because ORF was predicted de novo
    pathToCPATresultFile    = system.file("extdata/cpat_results.txt"   , package = "IsoformSwitchAnalyzeR"),
    pathToPFAMresultFile    = system.file("extdata/pfam_results.txt"   , package = "IsoformSwitchAnalyzeR"),
    pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR"),
    codingCutoff            = 0.725, # the coding potential cutoff we suggested for human 
    outputPlots             = FALSE  # keeps the function from outputting the plots from this example
)

## ------------------------------------------------------------------------
extractSwitchSummary(exampleSwitchList, filterForConsequences = TRUE, dIFcutoff = 0.4) # supply the same cutoff to the summary function

## ---- message = FALSE----------------------------------------------------
extractTopSwitches(exampleSwitchList, filterForConsequences = TRUE, n=3)

## ---- fig.width=12, fig.height=7, warning=FALSE--------------------------
switchPlot(exampleSwitchList, gene='KIF1B')

## ------------------------------------------------------------------------
data("exampleSwitchListAnalyzed")

## ---- fig.width=9, fig.height=5------------------------------------------
extractSwitchSummary(
    exampleSwitchListAnalyzed,
    filterForConsequences=TRUE
)

## ---- fig.width=9, fig.height=5------------------------------------------
extractSwitchOverlap(
    exampleSwitchListAnalyzed,
    filterForConsequences=TRUE
)

## ---- fig.width=12, fig.height=5-----------------------------------------
extractConsequenceSummary(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze='all',
    plotGenes = FALSE,           # enables analysis of genes (instead of isoforms)
    asFractionTotal = FALSE      # enables analysis of fraction of significant features
)

## ---- fig.width=12, fig.height=5-----------------------------------------
extractConsequenceEnrichment(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze='all',
    analysisOppositeConsequence = TRUE
)

## ---- fig.width=12, fig.height=4-----------------------------------------
extractConsequenceEnrichmentComparison(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze=c('domains_identified','intron_retention','coding_potential'),
    analysisOppositeConsequence = T
)

## ---- fig.width=12, fig.height=4.5---------------------------------------
extractSplicingEnrichment(exampleSwitchListAnalyzed)

## ---- fig.width=8, fig.height=4, warning=FALSE---------------------------
data("exampleSwitchListAnalyzed")

### Vulcano like plot:
ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=dIF, y=-log10(isoform_switch_q_value))) +
     geom_point(
        aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff
        size=1
    ) +
    geom_hline(yintercept = -log10(0.05), linetype='dashed') + # default cutoff
    geom_vline(xintercept = c(-0.1, 0.1), linetype='dashed') + # default cutoff
    facet_wrap( ~ condition_2) +
    #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions
    scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) +
    labs(x='dIF', y='-Log10 ( Isoform Switch Q Value )') +
    theme_bw()

## ---- fig.width=8, fig.height=4, warning=FALSE---------------------------
### Switch vs Gene changes:
ggplot(data=exampleSwitchListAnalyzed$isoformFeatures, aes(x=gene_log2_fold_change, y=dIF)) +
    geom_point(
        aes( color=abs(dIF) > 0.1 & isoform_switch_q_value < 0.05 ), # default cutoff
        size=1
    ) + 
    facet_wrap(~ condition_2) +
    #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions
    geom_hline(yintercept = 0, linetype='dashed') +
    geom_vline(xintercept = 0, linetype='dashed') +
    scale_color_manual('Signficant\nIsoform Switch', values = c('black','red')) +
    labs(x='Gene log2 fold change', y='dIF') +
    theme_bw()

## ------------------------------------------------------------------------
data("exampleSwitchList")         # A newly created switchAnalyzeRlist + switch analysis
names(exampleSwitchList)

data("exampleSwitchListAnalyzed") # A fully analyzed switchAnalyzeRlist
names(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
### Preview
head(exampleSwitchList, 2)
# tail(exampleSwitchList, 2)

### Dimentions
dim(exampleSwitchList$isoformFeatures)

nrow(exampleSwitchList)
ncol(exampleSwitchList)
dim(exampleSwitchList)

## ------------------------------------------------------------------------
exampleSwitchList

### Subset
subsetSwitchAnalyzeRlist(
    exampleSwitchList,
    exampleSwitchList$isoformFeatures$gene_name == 'ARHGEF19'
)

## ------------------------------------------------------------------------
head(exampleSwitchList$exons,2)

## ---- warning=FALSE, message=FALSE---------------------------------------
cuffDB <- prepareCuffExample()
cuffDB

## ---- warning=FALSE------------------------------------------------------
aSwitchList <- importCufflinksCummeRbund(cuffDB)
aSwitchList

## ---- warning=FALSE------------------------------------------------------
### Please note
# The way of importing file in this example with
# "system.file('pathToFile', package="cummeRbund") is
# specialiced to access the example data in the cummeRbund package
# and not something you need to do - just supply the string e.g.
# "/myAnnotation/myGenome/myFile.filetype" to the argument

testSwitchList <- importCufflinksFiles(
    pathToGTF                      = system.file('extdata/chr1_snippet.gtf',             package = "cummeRbund"),
    pathToGeneDEanalysis           = system.file('extdata/gene_exp.diff',                package = "cummeRbund"),
    pathToIsoformDEanalysis        = system.file('extdata/isoform_exp.diff',             package = "cummeRbund"),
    pathToGeneFPKMtracking         = system.file('extdata/genes.fpkm_tracking',          package = "cummeRbund"),
    pathToIsoformFPKMtracking      = system.file('extdata/isoforms.fpkm_tracking',       package = "cummeRbund"),
    pathToIsoformReadGroupTracking = system.file('extdata/isoforms.read_group_tracking', package = "cummeRbund"),
    pathToSplicingAnalysis         = system.file('extdata/splicing.diff',                package = "cummeRbund"),
    pathToReadGroups               = system.file('extdata/read_groups.info',             package = "cummeRbund"),
    pathToRunInfo                  = system.file('extdata/run.info',                     package = "cummeRbund"),
    fixCufflinksAnnotationProblem=TRUE,
    quiet=TRUE
)
testSwitchList

## ---- warning=FALSE, message=FALSE---------------------------------------
### Construct data needed from example data in cummeRbund package
### (The recomended way of analyzing Cufflinks/Cuffdiff data is via importCufflinksCummeRbund()
### This is just an easy way to get some example data ).
cuffDB <- prepareCuffExample()

## ------------------------------------------------------------------------
# Extract count matrix
isoRepCount <- repCountMatrix(isoforms(cuffDB))
isoRepCount$isoform_id <- rownames(isoRepCount)

# Extract design matrix
designMatrix <- cummeRbund::replicates(cuffDB)[,c('rep_name','sample_name')]
colnames(designMatrix) <- c('sampleID','condition')

# Extract isoform structure
localAnnotaion <- import(system.file("extdata/chr1_snippet.gtf", package="cummeRbund"))
localAnnotaion <- localAnnotaion[,c('transcript_id','gene_id')]
colnames(localAnnotaion@elementMetadata)[1] <- 'isoform_id'

### Please note
# 1) The way of importing the GTF file in this example with
#       "system.file('pathToFile', package="cummeRbund") is
#       specialized to access the example data in the cummeRbund package
#       and not somhting you need to do - just supply the string e.g.
#       "/myAnnotation/myGenome/gersionQuantified.gtf" to the import function
# 2) importRdata also supports direct import of a GTF file - just supply the
#       string to the "isoformExonAnnoation"" argument

### Take a look at the data
head(isoRepCount, 2)

designMatrix

head(localAnnotaion, 3)


## ------------------------------------------------------------------------
### Create switchAnalyzeRlist
aSwitchList <- importRdata(
    isoformCountMatrix=isoRepCount,
    designMatrix=designMatrix,
    isoformExonAnnoation=localAnnotaion, # alternatively supply a string that points directly to the GTF file
    showProgress=FALSE
)

aSwitchList

## ---- warning=FALSE, message=FALSE---------------------------------------
aSwitchList <- importGTF(pathToGTF = system.file("extdata/chr1_snippet.gtf", package = "cummeRbund"))

## ------------------------------------------------------------------------
aSwitchList

head(aSwitchList,2)
head(aSwitchList$conditions,2)

## ------------------------------------------------------------------------
data("exampleSwitchList")
exampleSwitchList

exampleSwitchListFiltered <- preFilter(exampleSwitchList, geneExpressionCutoff = 1, isoformExpressionCutoff = 0, removeSingleIsoformGenes = TRUE)

exampleSwitchListFilteredStrict <- preFilter(exampleSwitchList, geneExpressionCutoff = 10, isoformExpressionCutoff = 3, removeSingleIsoformGenes = TRUE)

## ---- warning=FALSE------------------------------------------------------
# Load example data and pre-filter
data("exampleSwitchList")
exampleSwitchList <- preFilter(exampleSwitchList) # preFilter for fast runtime

# Perform test (takes ~10 sec)
exampleSwitchListAnalyzed <- isoformSwitchTestDRIMSeq(
    switchAnalyzeRlist = exampleSwitchList,
    testIntegration='isoform_only',
    reduceToSwitchingGenes=TRUE
)

# Summarize switching features
extractSwitchSummary(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
### Show arguments of function
args(isoformSwitchTest)

## ------------------------------------------------------------------------
# Load example data and prefilter
data("exampleSwitchList")
exampleSwitchList <- preFilter(exampleSwitchList) # preFilter for fast runtime

# Perfom test
exampleSwitchListAnalyzed <- isoformSwitchTest(exampleSwitchList)

# Summarize swiching geatures
extractSwitchSummary(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
extractCalibrationStatus(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
# Perfom test
exampleSwitchListAnalyzed <- isoformSwitchTest(exampleSwitchList, calibratePvalues = FALSE)

# Summarize swiching geatures
extractSwitchSummary(exampleSwitchListAnalyzed)

# check callibration status
extractCalibrationStatus(exampleSwitchListAnalyzed)

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Usage of The Statistical Test' section above 

### analyzeORF needs the genomic sequence to predict ORFs. 
# These are readily available from Bioconductor as BSgenome orbjects: 
# http://bioconductor.org/packages/release/BiocViews.html#___BSgenome
# Here we use Hg19 --- which can be downloaded by copy/pasting the following two lines into the R termminal:
# source("https://bioconductor.org/biocLite.R")
# biocLite("BSgenome.Hsapiens.UCSC.hg19")

library(BSgenome.Hsapiens.UCSC.hg19)

exampleSwitchListAnalyzed <- analyzeORF(exampleSwitchListAnalyzed, genomeObject = Hsapiens, showProgress=FALSE)

head(exampleSwitchListAnalyzed$orfAnalysis, 3)


## ------------------------------------------------------------------------
### This example relies on the example data from the 'Analyzing Open Reading Frames' section above 

exampleSwitchListAnalyzed <- extractSequence(
    exampleSwitchListAnalyzed, 
    genomeObject = Hsapiens,
    pathToOutput = '<insert_path>',
    writeToFile=FALSE # to avoid output when running this example data
)

head(exampleSwitchListAnalyzed$ntSequence,2)

head(exampleSwitchListAnalyzed$aaSequence,2)

## ------------------------------------------------------------------------
### Load test data (maching the external sequence analysis results)
data("exampleSwitchListIntermediary")
exampleSwitchListIntermediary

### Add PFAM analysis
exampleSwitchListAnalyzed <- analyzePFAM(
    switchAnalyzeRlist   = exampleSwitchListIntermediary,
    pathToPFAMresultFile = system.file("extdata/pfam_results.txt", package = "IsoformSwitchAnalyzeR"),
    showProgress=FALSE
    )

### Add SignalP analysis
exampleSwitchListAnalyzed <- analyzeSignalP(
    switchAnalyzeRlist       = exampleSwitchListAnalyzed,
    pathToSignalPresultFile = system.file("extdata/signalP_results.txt", package = "IsoformSwitchAnalyzeR")
    )
    
### Add CPAT analysis
exampleSwitchListAnalyzed <- analyzeCPAT(
    switchAnalyzeRlist   = exampleSwitchListAnalyzed,
    pathToCPATresultFile = system.file("extdata/cpat_results.txt", package = "IsoformSwitchAnalyzeR"),
    codingCutoff         = 0.725, # the coding potential cutoff we suggested for human
    removeNoncodinORFs   = TRUE   # because ORF was predicted de novo
    )

exampleSwitchListAnalyzed

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Importing External Sequence Analysis' section above 

exampleSwitchListAnalyzed <- analyzeAlternativeSplicing(exampleSwitchListAnalyzed, quiet=TRUE)

### overview of number of intron retentions (IR)
table(exampleSwitchListAnalyzed$isoformFeatures$IR)

## ------------------------------------------------------------------------
### This example relies on the example data from the 'Predicting Alternative Splicing' section above 

# the consequences highlighted in the text above
consequencesOfInterest <- c('intron_retention','coding_potential','NMD_status','domains_identified','ORF_seq_similarity')

exampleSwitchListAnalyzed <- analyzeSwitchConsequences(
    exampleSwitchListAnalyzed,
    consequencesToAnalyze = consequencesOfInterest, 
    dIFcutoff = 0.4, # very high cutoff for fast runtimes
    showProgress=FALSE
)

extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = FALSE)
extractSwitchSummary(exampleSwitchListAnalyzed, dIFcutoff = 0.4, filterForConsequences = TRUE)

## ------------------------------------------------------------------------
### Load already analyzed data
data("exampleSwitchListAnalyzed")

### Let's reduce the switchAnalyzeRlist to only one condition
exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist(
    exampleSwitchListAnalyzed, 
    exampleSwitchListAnalyzed$isoformFeatures$condition_1 == 'COAD_ctrl'
)
exampleSwitchListAnalyzedSubset

### Extract top switching genes (by q-value)
extractTopSwitches(
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = 2, 
    sortByQvals = TRUE
)

### Extract top 2 switching genes (by dIF values)
extractTopSwitches(
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = 2, 
    sortByQvals = FALSE
)

## ------------------------------------------------------------------------
### Extract data.frame with all switching isoforms
switchingIso <- extractTopSwitches( 
    exampleSwitchListAnalyzedSubset, 
    filterForConsequences = TRUE, 
    n = NA,                  # n=NA: all features are returned
    extractGenes = FALSE,    # when FALSE isoforms are returned
    sortByQvals = TRUE
)

subset(switchingIso, gene_name == 'ZAK')

## ---- fig.width=12, fig.height=7-----------------------------------------
switchPlot(exampleSwitchListAnalyzedSubset, gene = 'ZAK')

## ---- fig.width=12, fig.height=3-----------------------------------------
### Load already analyzed data
data("exampleSwitchListAnalyzed")

switchPlotTranscript(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B')

## ---- fig.width=3, fig.height=3------------------------------------------
switchPlotGeneExp (exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC')

## ---- fig.width=4, fig.height=3------------------------------------------
switchPlotIsoExp(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC')

## ---- fig.width=4, fig.height=3------------------------------------------
switchPlotIsoUsage(exampleSwitchListAnalyzedSubset, gene = 'TNFRSF1B', condition1='iPS', condition2='hESC')

## ------------------------------------------------------------------------
data("exampleSwitchListIntermediary")
ifMatrix <- extractExpressionMatrix(exampleSwitchListAnalyzed)

head(ifMatrix)

dim(ifMatrix)


## ---- fig.width=4, fig.height=3------------------------------------------
# correlation plot
ggplot(melt(cor(ifMatrix)), aes(x=Var1, y=Var2, fill=value)) + 
    geom_tile() +
    scale_fill_continuous('Correlation') +
    labs(x='Condition', y='Condition') +
    theme_minimal() +
    theme(axis.text.x=element_text(angle=-90, hjust = 0, vjust=0.5)) # turn x-axis 90 degrees

## ---- fig.width=8, fig.height=4------------------------------------------
library(pheatmap)
pheatmap(t(ifMatrix), show_colnames=FALSE)

## ------------------------------------------------------------------------
data("exampleSwitchListAnalyzed")
exampleSwitchListAnalyzed

## ---- fig.width=12, fig.height=5-----------------------------------------
extractSplicingSummary(
    exampleSwitchListAnalyzed,
    asFractionTotal = FALSE,
    plotGenes=FALSE
)

## ---- fig.width=12, fig.height=4.5---------------------------------------
splicingEnrichment <- extractSplicingEnrichment(
    exampleSwitchListAnalyzed,
    splicingToAnalyze='all',
    returnResult=TRUE,
    returnSummary=TRUE
)

## ---- fig.width=12, fig.height=6-----------------------------------------
subset(splicingEnrichment, splicingEnrichment$AStype == 'ATSS')

## ---- fig.width=12, fig.height=4.5---------------------------------------
extractSplicingEnrichmentComparison(
    exampleSwitchListAnalyzed,
    splicingToAnalyze=c('A3','MES','ATSS'), # Those significant in COAD in the previous analysis
    returnResult=FALSE # Preventing the summary statistics to be returned as a data.frame
)

## ---- fig.width=10, fig.height=6-----------------------------------------
extractSplicingGenomeWide(
    exampleSwitchListAnalyzed,
    featureToExtract = 'all',                 # all isoforms stored in the switchAnalyzeRlist
    splicingToAnalyze = c('A3','MES','ATSS'), # Splice types significantly enriched in COAD
    plot=TRUE,
    returnResult=FALSE  # Preventing the summary statistics to be returned as a data.frame
)

## ---- results = "hide", message = FALSE----------------------------------
### Load example data
data("exampleSwitchListAnalyzed")

### Reduce datasize for fast runtime
selectedGenes <- unique(exampleSwitchListAnalyzed$isoformFeatures$gene_id)[50:100]
exampleSwitchListAnalyzedSubset <- subsetSwitchAnalyzeRlist(
    exampleSwitchListAnalyzed,
    exampleSwitchListAnalyzed$isoformFeatures$gene_id %in% selectedGenes
)

### analyze the biological mechanisms
bioMechanismeAnalysis <- analyzeSwitchConsequences(
    exampleSwitchListAnalyzedSubset, 
    consequencesToAnalyze = c('tss','tts','intron_structure'),
    showProgress = FALSE
)$switchConsequence # only the consequences are interesting here

### subset to those with differences
bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoformsDifferent),]

### extract the consequences of interest already stored in the switchAnalyzeRlist
myConsequences <- exampleSwitchListAnalyzedSubset$switchConsequence
myConsequences <- myConsequences[which(myConsequences$isoformsDifferent),]
myConsequences$isoPair <- paste(myConsequences$isoformUpregulated, myConsequences$isoformDownregulated) # id for specific iso comparison

### Obtain the mechanisms of the isoform switches with consequences
bioMechanismeAnalysis$isoPair <- paste(bioMechanismeAnalysis$isoformUpregulated, bioMechanismeAnalysis$isoformDownregulated)
bioMechanismeAnalysis <- bioMechanismeAnalysis[which(bioMechanismeAnalysis$isoPair %in% myConsequences$isoPair),]  # id for specific iso comparison

## ---- fig.width=3.5, fig.height=3.5--------------------------------------
### Create list with the isoPair ids for each consequencee
AS   <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'intron_structure')]
aTSS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tss'             )]
aTTS <- bioMechanismeAnalysis$isoPair[ which( bioMechanismeAnalysis$featureCompared == 'tts'             )]

mechList <- list(
    AS=AS,
    aTSS=aTSS,
    aTTS=aTTS
)

### Create venn diagram
library(VennDiagram)
myVenn <- venn.diagram(mechList, col='transparent', alpha=0.4, fill=RColorBrewer::brewer.pal(n=3,name='Dark2'), filename=NULL)

### Plot the venn diagram
grid.newpage() ; grid.draw(myVenn)


## ------------------------------------------------------------------------
sessionInfo()