## ----setup, echo=FALSE, results="hide"----------------------------------------
knitr::opts_chunk$set(
  tidy = FALSE,
  cache = FALSE,
  fig.path = "vignettes/",
  dev = "png",
  message = FALSE,
  error = FALSE,
  warning = TRUE
)

suppressPackageStartupMessages({
  library(dplyr)
  library(data.table)
})

## ----load_package_data, eval=TRUE, echo=TRUE----------------------------------
# Load the package
library(postNet)

# Load the vignette example data
data("postNetVignette", package = "postNet")

## ----gettingStartedInput, eval=TRUE, echo=TRUE--------------------------------
# Prepare custom gene lists and regulatory effect measurement:
myGenes <- postNetVignette$geneList
str(myGenes)
myBg <- postNetVignette$background
str(myBg)
myEffect <- postNetVignette$effect
str(myEffect)

## ----gettingStartedInitialize, eval=TRUE, echo=TRUE, results='hide'-----------
# Initialize a postNetData object:
ptn <- postNetStart(
  geneList = myGenes,
  geneListcolours = c("#FC9272", "#99000D"),
  customBg = myBg,
  effectMeasure = myEffect,
  selection = "random",
  setSeed = 123,
  source = "load",
  species = "human"
)

## ----gettingStartedEnumerate, eval=TRUE, echo=TRUE----------------------------
# Quantify the length and nucleotide content of the 5'UTR and compare between regulated gene sets
len <- lengthAnalysis(
  ptn = ptn,
  region = c("UTR5"),
  comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
  plotOut = TRUE,
  plotType = "boxplot",
  pdfName = "Example"
)
str(len)

content_UTR5 <- contentAnalysis(
  ptn = ptn,
  region = c("UTR5"),
  comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
  contentIn = c("GC"),
  plotOut = TRUE,
  plotType = "ecdf",
  pdfName = "Example"
)
str(content_UTR5)

## ----gettingStartedEnumeratePNGS, eval=TRUE, echo=FALSE, fig.cap="Comparisons of 5'UTR length and GC content between mRNAs belonging to regulated gene sets."----
knitr::include_graphics("Figures/Figure1.png")

## ----gettingStartedFeatureInt, eval=FALSE, echo=TRUE--------------------------
# # Prepare the list of pre-calculated features to be used in modelling  (for example,
# # the 'len' and 'content_UTR' variables defined in the previous step can be included)
# features <- postNetVignette$features
# str(features)
# 
# # Signatures of trans-acting factors can also be included, and some are provided with the package
# humanSignatures <- get_signatures(species = "human")
# str(humanSignatures)
# 
# # Signatures can be converted into feature inputs using the signCalc function
# trans_signatures <- signCalc(ptn, humanSignatures)
# 
# # Compile the final feature input:
# features <- c(features, trans_signatures)
# 
# # Group the features according to category (optional)
# group <- c(
#   "UTR5", "UTR3", "UTR5", "CDS", "UTR5", "UTR5", "UTR5", "CDS", "CDS",
#   rep("Pathway", 2), "UTR5", rep("Pathway", 2)
# )
# groupColour <- c("#834b62", "#6699cc", "#e9724c", "#fff299")
# names(groupColour) <- c("UTR5", "CDS", "UTR3", "Pathway")
# 
# # Run feature integration modelling using forward stepwise regression
# ptn <- featureIntegration(
#   ptn = ptn,
#   features = features,
#   pdfName = "omnibus",
#   regOnly = TRUE,
#   allFeat = FALSE,
#   analysis_type = "lm",
#   covarFilt = 20,
#   comparisons = list(c(1, 2)),
#   lmfeatGroup = group,
#   lmfeatGroupColour = groupColour,
#   NetModelSel = "omnibus"
# )

## ----gettingStartedFeatureInt2, eval=TRUE, echo=FALSE, include=FALSE----------
# Prepare the list of pre-calculated features to be used in modelling  (for example,
# the 'len' and 'content_UTR' variables defined in the previous step can be included)
features <- postNetVignette$features
str(features)

# Signatures of trans-acting factors can also be included, and some are provided with the package
humanSignatures <- get_signatures(species = "human")
str(humanSignatures)

# Signatures can be converted into feature inputs using the signCalc function
trans_signatures <- signCalc(ptn, humanSignatures)

# Compile the final feature input:
features <- c(features, trans_signatures)

# Group the features according to category (optional)
group <- c(
  "UTR5", "UTR3", "UTR5", "CDS", "UTR5", "UTR5", "UTR5", "CDS", "CDS",
  rep("Pathway", 2), "UTR5", rep("Pathway", 2)
)
groupColour <- c("#834b62", "#6699cc", "#e9724c", "#fff299")
names(groupColour) <- c("UTR5", "CDS", "UTR3", "Pathway")

# Run feature integration modelling using forward stepwise regression
ptn <- featureIntegration(
  ptn = ptn,
  features = features,
  pdfName = "omnibus",
  regOnly = TRUE,
  allFeat = FALSE,
  analysis_type = "lm",
  covarFilt = 20,
  comparisons = list(c(1, 2)),
  lmfeatGroup = group,
  lmfeatGroupColour = groupColour,
  NetModelSel = "omnibus"
)

## ----gettingStartedFeatureIntNetworkPNG, eval=TRUE, echo=FALSE, fig.cap="Network plot of the results of the stepwise regression omnibus model."----
knitr::include_graphics("Figures/Figure2.png")

## ----gettingStartedPlotFeaturesMap, eval=TRUE, echo=TRUE----------------------
ptn <- plotFeaturesMap(ptn,
  regOnly = TRUE,
  comparisons = list(c(1, 2)),
  featSel = names(ptn_selectedFeatures(ptn,
    analysis_type = "lm",
    comparison = 1
  )),
  remBinary = TRUE,
  featCol = "UTR5_SCSCGS",
  scaled = FALSE,
  remExtreme = 0.1,
  pdfName = "Example"
)

## ----gettingStartedPlotFeaturesMapPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="UMAP visualizing changes in translation efficiency (Effect) generated based on features identified in the omnibus model (left), with mRNAs containing SCSCGS motifs in the 5´UTR coloured (right)."----
knitr::include_graphics("Figures/Figure3.png")

## ----postNetStartGeneList, eval=TRUE, echo=TRUE-------------------------------
# Genes of interest should be provided in a named list.
myGenes <- postNetVignette$geneList
str(myGenes)

# All gene IDs in the list should be present in the background.
myBg <- postNetVignette$background
str(myBg)

# The regulatory effect measurement must be named with the same gene IDs
# present in the background (or the gene list if no custom background is provided).
myEffect <- postNetVignette$effect
str(myEffect)

## ----postNetStartGeneListInit, eval=TRUE, echo=TRUE, results = 'hide'---------
# Initialize the postNetData object with custom gene lists:
ptn <- postNetStart(
  geneList = myGenes,
  geneListcolours = c("#FC9272", "#99000D"),
  customBg = myBg,
  effectMeasure = myEffect,
  selection = "random",
  setSeed = 123,
  source = "load",
  species = "human"
)

## ----GetAds, eval=TRUE, echo=TRUE, include=FALSE, results = 'hide', warning = FALSE----
# Initialize Anota2seqDataSet using example data (see anota2seq vignette for details)
ads <- anota2seq::anota2seqDataSetFromMatrix(
  dataP = postNetVignette$ads_data$dataP,
  dataT = postNetVignette$ads_data$dataT,
  phenoVec = postNetVignette$ads_data$phenoVec,
  batchVec = c(1, 2, 3, 4, 1, 2, 3, 4),
  dataType = "RNAseq",
  normalize = FALSE
)

# Run an anota2seq analysis:
# Note that the quality control and residual outlier testing are not
# performed to limit the running time of this example. For full details
# on running an analysis please see the anota2seq vignette and help manual.
ads <- anota2seq::anota2seqRun(ads,
  performQC = FALSE,
  performROT = FALSE,
  useProgBar = FALSE
)

## ----postNetStartAds, eval=TRUE, echo=TRUE, results = 'hide'------------------
# Initialize the postNetData object:
ptn_withAds <- postNetStart(
  ads = ads,
  regulation = c("translationUp", "translationDown", "bufferingmRNAUp", "bufferingmRNADown"),
  contrast = c(1, 1, 1, 1),
  regulationGen = "translation",
  contrastSel = 1,
  selection = "random",
  setSeed = 123, # ensures reproducibility of random isoform selection
  source = "load",
  species = "human"
)

## ----RefSeqLoad, eval=FALSE, echo=TRUE----------------------------------------
# # Prepare custom gene lists and regulatory effect measurement using example data:
# myGenes <- postNetVignette$geneList
# myBg <- postNetVignette$background
# myEffect <- postNetVignette$effect
# 
# # Initialize a postNetData object using in-built annotations for human:
# ptn <- postNetStart(
#   geneList = myGenes,
#   geneListcolours = c("#FC9272", "#99000D"),
#   customBg = myBg,
#   effectMeasure = myEffect,
#   source = "load",
#   species = "human",
#   version = "ver_40.202408"
# )

## ----RefSeqCreate, eval=FALSE, echo=TRUE--------------------------------------
# # Initialize a postNetData object creating new RefSeq reference sequence annotations for human:
# ptn <- postNetStart(
#   geneList = myGenes,
#   geneListcolours = c("#FC9272", "#99000D"),
#   customBg = myBg,
#   effectMeasure = myEffect,
#   source = "create",
#   species = "human",
# )

## ----RefSeqCreateFromSourceFiles, eval=FALSE, echo=TRUE-----------------------
# #  The required RefSeq annotation files downloaded from the NCBI database will
# #  have the following naming:
# #  - "example_rna.gbff.gz"
# #  - "example_rna.fa.gz"
# #  - "example_genomic.gff.gz"
# 
# # Initialize a postNetData object creating a new RefSeq reference sequence annotation
# # from source files:
# ptn <- postNetStart(
#   geneList = myGenes,
#   geneListcolours = c("#FC9272", "#99000D"),
#   customBg = myBg,
#   effectMeasure = myEffect,
#   source = "createFromSourceFiles",
#   species = "human",
#   rna_gbff_file = "example_rna.gbff.gz",
#   rna_fa_file = "example_rna.fa.gz",
#   genomic_gff_file = "example_genomic.gff.gz"
# )

## ----RefSeqCreateFromFasta, eval=FALSE, echo=TRUE-----------------------------
# # An example of the required format for the posFile parameter ("positions.txt"):
# # id	UTR5_len	cds_stop	total_length
# # NM_000014	70	4495	4610
# # NM_000015	70	943	1285
# # NM_000016	79	1345	2261
# 
# # Initialize a postNetData object creating a new RefSeq reference sequence annotation
# # from source files:
# ptn <- postNetStart(
#   geneList = myGenes,
#   geneListcolours = c("#FC9272", "#99000D"),
#   customBg = myBg,
#   effectMeasure = myEffect,
#   source = "createFromFasta",
#   species = "human",
#   fastaFile = "_rna.fa.gz",
#   posFile = "positions.txt",
#   genomic_gff_file = "_genomic.gff.gz"
# )

## ----RefSeqCustom, eval=FALSE, echo=TRUE--------------------------------------
# # An example of the required format for the customFile parameter ("customSequences.txt"):
# # id	geneID	UTR5_seq	CDS_seq	UTR3_seq
# # NM_000014	A2M	GGGACCAG...	ATGGGGAA...	AGACCACA...
# 
# # Initialize a postNetData object with a custom reference sequence annotation file:
# ptn <- postNetStart(
#   geneList = myGenes,
#   geneListcolours = c("#FC9272", "#99000D"),
#   customBg = myBg,
#   effectMeasure = myEffect,
#   source = "custom",
#   species = NULL,
#   customFile = "customSequences.txt"
# )

## ----IsoformSel, eval=FALSE, echo=TRUE----------------------------------------
# # Initialize a postNetData object with reproducible random mRNA isoform selection:
# ptn <- postNetStart(
#   geneList = myGenes,
#   geneListcolours = c("#FC9272", "#99000D"),
#   customBg = myBg,
#   effectMeasure = myEffect,
#   source = "load",
#   species = "human",
#   selection = "random",
#   setSeed = 123
# )

## ----AdjObj, eval=TRUE, echo=TRUE---------------------------------------------
# Create the adjObj list with custom 5'UTR sequences to replace those in the loaded annotation file
myUTR5seqs <- ptn_sequences(ptn, "UTR5")
myIDd <- ptn_id(ptn, "UTR5")
names(myUTR5seqs) <- myIDd
customUTR5s <- list(UTR5 = myUTR5seqs)
str(customUTR5s)

## ----AdjustUTRs, eval=FALSE, echo=TRUE----------------------------------------
# # Initialize a postNetData object with custom 5'UTR sequences, replacing the 5'UTR sequences
# # in the loaded annotation file with custom ones where available, and discarding all isoforms
# # without custom sequences. Genes with no custom sequence will retain all isoforms from the original annotation:
# ptn <- postNetStart(
#   geneList = myGenes,
#   geneListcolours = c("#FC9272", "#99000D"),
#   customBg = myBg,
#   effectMeasure = myEffect,
#   source = "load",
#   species = "human",
#   selection = "random",
#   setSeed = 123,
#   adjObj = customUTR5s,
#   region_adj = "UTR5",
#   excl = FALSE,
#   keepAll = FALSE
# )

## ----lengthAnalysisBoxplot, eval=TRUE, echo=TRUE------------------------------
# Calculate the length of the 5'UTR, 3'UTR and coding sequence for each gene,
# and compare lengths between translationUp genes vs. background,
# translationDown genes vs. background, and translationUp vs. translationDown genes
len <- lengthAnalysis(
  ptn = ptn,
  region = c("UTR5", "CDS", "UTR3"),
  comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
  plotOut = TRUE,
  plotType = "ecdf",
  pdfName = "Example"
)
str(len)

## ----lengthAnalysisPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="eCDFs visualizing differences in Log2(Length) of mRNA sequence regions between different gene sets. 5'UTR (left), 3'UTR (middle), CDS (right). Statistical comparisons are made between translationally activated (blue), suppressed (red), and background (grey) gene sets. Visualizations can also be generated as either box plots, or violin plots"----
knitr::include_graphics("Figures/Figure4.png")

## ----contentAnalysisGCviolin, eval=TRUE, echo=TRUE----------------------------
# Calculate the GC content of each sequence region for each gene, and compare between
# translationUp genes vs. background, translationDown genes vs. background, and
# translationUp vs. translationDown genes
GC_content <- contentAnalysis(
  ptn = ptn,
  region = c("UTR5", "CDS", "UTR3"),
  comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
  contentIn = c("GC"),
  plotOut = TRUE,
  plotType = "violin",
  pdfName = "Example"
)

## ----contentAnalysisPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Violin plots visualizing differences in the percentage of GC content of mRNA sequence regions between different gene sets. 5'UTR (left), 3'UTR (middle), CDS (right). Statistical comparisons are made between translationally activated (blue), suppressed (red), and background (grey) gene sets. Visualizations can also be generated as either box plots, or eCDFs"----
knitr::include_graphics("Figures/Figure5.png")

## ----contentAnalysisGC3, eval=TRUE, echo=TRUE---------------------------------
# Calculate the GC of each sequence region for each gene, as well as the GC3 content of
# the coding region and compare between translationUp genes vs. background, translationDown
# genes vs. background, and translationUp vs. translationDown genes
GC3_content <- contentAnalysis(
  ptn = ptn,
  region = c("CDS"),
  comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
  contentIn = c("GC3"),
  plotOut = TRUE,
  plotType = "violin",
  pdfName = "Example"
)
str(GC_content)

## ----contentAnalysisSubregion, eval=FALSE, echo=TRUE--------------------------
# # Calculate the G content of the first and last 15 nucleotides of the 5'UTR for each gene, and
# # compare between translationUp genes vs. background, translationDown genes vs. background, and
# # translationUp vs. translationDown genes
# content_UTR5_first15 <- contentAnalysis(
#   ptn = ptn,
#   region = c("UTR5"),
#   subregion = 15,
#   subregionSel = "select",
#   comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
#   contentIn = c("G"),
#   plotOut = TRUE,
#   plotType = "ecdf",
#   pdfName = "Example_First15utr5"
# )
# 
# content_UTR5_last15 <- contentAnalysis(
#   ptn = ptn,
#   region = c("UTR5"),
#   subregion = -15,
#   subregionSel = "select",
#   comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
#   contentIn = c("G"),
#   plotOut = TRUE,
#   plotType = "ecdf",
#   pdfName = "Example_Last15utr5"
# )

## ----foldingEnergyBoxplot, eval=TRUE, echo=TRUE-------------------------------
# Compare the folding energy of the 5'UTRs between translationUp
# genes vs. background, translationDown genes vs. background,
# and translationUp vs. translationDown genes.
FE <- foldingEnergyAnalysis(
  ptn = ptn,
  region = c("UTR5"),
  comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
  residFE = FALSE,
  plotType = "boxplot",
  sourceFE = "load",
  plotOut = TRUE,
  pdfName = "Example"
)
str(FE)

## ----foldingEnergyPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Box plots visualizing differences in the folding energy of mRNA 5'UTRs between different gene sets. Statistical comparisons are made between translationally activated (light red), suppressed (dark red), and background (grey) gene sets. Visualizations can also be generated as either violin plots, or eCDFs"----
knitr::include_graphics("Figures/Figure6.png")

## ----foldingEnergyCustom, eval=FALSE, echo=TRUE-------------------------------
# # An example of the required input format for the customFile parameter ("custom_UTR5FE.txt"):
# # id	fold_energy	length
# # NM_018117	-16.3	34
# # NM_001025603	-36.84	162
# # NM_001003891	-28.4	81
# # NM_021107	-120.14	350
# 
# # Compare the folding energy of the 5'UTR for each gene between translationUp genes vs. background,
# # translationDown genes vs. background, and translationUp vs. translationDown genes.
# customFE <- foldingEnergyAnalysis(
#   ptn = ptn,
#   region = c("UTR5"),
#   comparisons = list(c(0, 1), c(0, 2), c(1, 2)),
#   residFE = FALSE,
#   plotType = "violin",
#   sourceFE = "custom",
#   customFileFE = "~/Path/To/CustomFile/custom_UTR5FE.txt",
#   plotOut = TRUE,
#   pdfName = "Example_customFE"
# )

## ----uorfAnalysis, eval=TRUE, echo=TRUE---------------------------------------
# Identify all uORFs with canonical start codons in a strong Kozak context (including
# those not fully contained within 5'UTRs, and compare
# between translationUp vs. translationDown genes

uORFs <- uorfAnalysis(
  ptn = ptn,
  comparisons = list(c(1, 2)),
  startCodon = "ATG",
  KozakContext = c("strong"),
  onlyUTR5 = FALSE,
  unitOut = "number",
  plotOut = TRUE,
  pdfName = "Example"
)
str(uORFs)

## ----uorfAnalysisPNG,eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Bar plot visualizing differences in the proportion of transcripts containing uORFs with canonical start codons in a strong Kozak context between different gene sets. Statistical comparisons are made between translationally activated (blue), and suppressed (red) gene sets."----
knitr::include_graphics("Figures/Figure7.png")

## ----motifAnalysis, eval=FALSE, echo=TRUE-------------------------------------
# # Note that as users must provide the path to the MEME Suite executables. An example of
# # how to run this function is provided below, and can be updated with the correct memePath argument.
# 
# ptn <- motifAnalysis(
#   ptn = ptn,
#   stremeThreshold = 0.05,
#   minwidth = 6,
#   memePath = "/meme/bin",
#   region = c("UTR5", "CDS", "UTR3")
# )
# 
# # Extract the significantly enriched motifs:
# denovo_UTR5motifs <- ptn_motifSelection(
#   ptn = ptn,
#   region = "UTR5"
# )
# 
# # Extract full motif results from one gene set of interest:
# denovo_UTR5motifs_TransUp <- ptn_motifGeneList(
#   ptn = ptn,
#   region = "UTR5",
#   geneList = "translationUp"
# )

## ----contentMotifs, eval=TRUE, echo=TRUE--------------------------------------
# Detect and quantify a given motif within 5'UTRs, and compare between translationUp vs.
# translationDown genes

UTR5_SCSCGS_num <- contentMotifs(
  ptn = ptn,
  motifsIn = "SCSCGS",
  region = c("UTR5"),
  comparisons = list(c(1, 2)),
  dist = 1,
  unitOut = "number",
  pdfName = "Example",
  plotOut = TRUE
)
str(UTR5_SCSCGS_num)

# Now find the coordinates of positions where the motif occurs in 5'UTRs

UTR5_SCSCGS_pos <- contentMotifs(
  ptn = ptn,
  motifsIn = "SCSCGS",
  region = c("UTR5"),
  comparisons = list(c(1, 2)),
  dist = 1,
  unitOut = "position"
)
str(UTR5_SCSCGS_pos$UTR5_SCSCGS[1:3])

## ----contentMotifsPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="eCDFs visualizing differences in the presence of the SCSCGS motif in the 5'UTR of different gene sets. Statistical comparisons are made between translationally activated (blue) and suppressed (red) gene sets. Background genes are shown in grey."----
knitr::include_graphics("Figures/Figure8.png")

## ----codonUsageTable, eval=FALSE, echo=TRUE-----------------------------------
# # Examine the composition of all genes:
# 
# ptn <- codonUsage(
#   ptn = ptn,
#   annotType = "ptnCDS",
#   sourceSeq = "load",
#   analysis = "codon",
#   codonN = 1,
#   comparisons = list(c(1, 2))
# )
# 
# # Retrieve the codon usage summary table
# codonUsageTable <- ptn_codonAnalysis(ptn)

## ----codonUsagePlotsPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Scatter plots comparing average codon usage and frequency between gene sets of interest. Codons encoding the same amino acid are coloured and connected by lines."----
knitr::include_graphics("Figures/Figure9.png")

## ----codonUsageIndexPNG, echo=FALSE, fig.wide = TRUE, fig.cap="Violin plots of several codon usage indexes calculated by the codonUsage() function. Shown are several examples including CAI (left), CBI (middle), and Fop (right). Statistical comparisons are made between translationally activated (blue), suppressed (red), and background (grey) gene sets. Visualizations can also be generated as either box plots, or eCDFs"----
knitr::include_graphics("Figures/Figure10.png")

## ----codonUsageSelected, eval=FALSE, echo=TRUE--------------------------------
# # Identify enriched and depleted codons between
# # translationally activated and suppressed genes
# 
# codonUsage(
#   ptn = ptn,
#   annotType = "ptnCDS",
#   sourceSeq = "load",
#   analysis = "codon",
#   codonN = 1,
#   pAdj = 0.01,
#   plotHeatmap = TRUE,
#   thresOddsUp = 0.3,
#   thresFreqUp = 0.3,
#   thresOddsDown = 0.3,
#   thresFreqDown = 0.3,
#   comparisons = list(c(1, 2)),
#   pdfName = "Example"
# )

## ----codonUsageSelectedPNG,eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Visualizations of differential codon usage between different gene sets of interest. A clustered heatmap of standardized residuals for each codon from a Chi-square test (left), and a plot of the log2 odds ratio for each codon between translationally activated and suppressed gene sets, vs. the average codon frequency. Those passing thresholds for enrichment or depletion between gene sets are highlighted in red and blue, respectively."----
knitr::include_graphics("Figures/Figure11.png")

## ----codonCalc, eval=FALSE, echo=TRUE-----------------------------------------
# # Select codons of interest with high frequency, and the highest and lowest odds ratios
# # between translationally activated and suppressed genes
# 
# codons <- ptn_codonSelection(ptn,
#   comparison = 1
# )
# 
# # Calculate and plot eCDFs of codon frequencies, and compare between gene sets
# 
# codonCounts <- codonCalc(
#   ptn = ptn,
#   analysis = "codon",
#   featsel = codons,
#   unit = "freq",
#   comparisons = list(c(1, 2)),
#   plotType = "ecdf"
# )

## ----codonCalcPNG,eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="eCDFs visualizing differences in the frequency of enriched (left) and depleted (right) codons between gene sets. Statistical comparisons are made between translationally activated (blue), suppressed (red) gene sets, with background shown in grey. Visualizations can also be generated as either box plots, or violin plots."----
knitr::include_graphics("Figures/Figure12.png")

## ----signCalc, eval=TRUE, echo=TRUE-------------------------------------------
# load the signatures provided with the package:
data("humanSignatures")
str(humanSignatures)

# Convert the gene signatures to a binary format for featureIntegration:
signatureFeatures <- signCalc(ptn = ptn, signatures = humanSignatures)
str(signatureFeatures)

## ----createFeaturesList, eval=FALSE, echo=TRUE--------------------------------
# # Compile a list of features:
# myFeatureList <- c(
#   len[c(1, 3)],
#   GC_content[c(1)],
#   GC3_content,
#   FE[c(1)],
#   uORFs,
#   UTR5_SCSCGS_num,
#   codonCounts,
#   signatureFeatures
# )
# 
# # Check that the elements of the list are named:

## ----univariatePNG, eval=TRUE,echo=FALSE, fig.wide = TRUE, fig.cap="Output of univariate linear models from featureIntegration."----
knitr::include_graphics("Figures/Figure13.png")

## ----stepwisePNG, eval=TRUE,echo=FALSE, fig.wide = TRUE, fig.cap="Output of stepwise regression summarizing F-values at each step of modelling."----
knitr::include_graphics("Figures/Figure14.png")

## ----omnibusPNG,eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Summary table of features included in the final omnibus and adjusted models after forward stepwise regression."----
knitr::include_graphics("Figures/Figure15.png")

## ----correlationPNG, eval=TRUE,echo=FALSE, fig.wide = FALSE, fig.cap="Summary table of Pearson's correlation coefficients between features."----
knitr::include_graphics("Figures/Figure16.png")

## ----runLM, eval=FALSE, echo=TRUE---------------------------------------------
# # Run feature integration modelling using forward stepwise regression
# ptn <- featureIntegration(
#   ptn = ptn,
#   features = myFeatureList,
#   pdfName = "Example",
#   regOnly = TRUE,
#   allFeat = FALSE,
#   analysis_type = "lm",
#   comparisons = list(c(1, 2))
# )

## ----retrieveLM, eval=FALSE, echo=TRUE----------------------------------------
# # Check which models are available in the postNetData object
# ptn_check_models(ptn = ptn, analysis_type = "lm")
# 
# # Extract the set of significant features selected in the omnibus model
# selectedFeaturesLM <- ptn_selectedFeatures(
#   ptn = ptn,
#   analysis_type = "lm",
#   comparison = 1
# )
# 
# # Retrieve the stepwise regression results (other options include "stepwiseModel" and "finalModel"):
# univariateModel <- ptn_model(ptn, analysis_type = "lm", model = "univariateModel", comparison = 1)

## ----networkPNG,eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Network visualization of the results of forward stepwise regression modelling. Nodes represent features that were selected in the omnibus model, with the size proportional to the total variance in the regulatory effect explained by that feature. Edges represent the Pearson correlation coefficient between features. Wider lines indicate stronger positive or negative correlations. Features without a node indicated in pink represent those that were not selected in the omnibus model, but correlate substantially with one or more features that were significant in the model."----
knitr::include_graphics("Figures/Figure17.png")

## ----networkGroupingLM, eval=FALSE, echo=TRUE---------------------------------
# # Examine the input features
# names(myFeatureList)
# 
# # Make a vector to assign groups to each feature according to categories:
# group <- c(
#   "UTR5", "UTR3", "UTR5", "CDS", "UTR5", "UTR5", "UTR5", "CDS", "CDS",
#   rep("Pathway", 2), "UTR5", rep("Pathway", 2)
# )
# 
# # Assign colours to each grouping category:
# groupColour <- c("#834b62", "#6699cc", "#e9724c", "#fff299")
# names(groupColour) <- c("UTR5", "CDS", "UTR3", "Pathway")
# 
# 
# # Run feature integration modelling using forward stepwise regression
# ptn <- featureIntegration(
#   ptn = ptn,
#   features = myFeatureList,
#   pdfName = "Example_grouped",
#   regOnly = TRUE,
#   allFeat = FALSE,
#   analysis_type = "lm",
#   comparisons = list(c(1, 2)),
#   lmfeatGroup = group,
#   lmfeatGroupColour = groupColour
# )

## ----networkGroupingPNG,eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Network visualization of the results of forward stepwise regression modelling. Selected features are grouped according to which region of the mRNA they correspond to, as well as upstream regulatory pathways known to alter translation efficiency."----
knitr::include_graphics("Figures/Figure18.png")

## ----retrieveNetworkLM, eval=FALSE, echo=TRUE---------------------------------
# # Retrieve the network graph of the results of the stepwise regression omnibus model:
# networkGraph <- ptn_networkGraph(ptn, comparison = 1)

## ----runRF, eval=FALSE, echo=FALSE--------------------------------------------
# # Run feature integration modelling using Random Forest classification
# ptn <- featureIntegration(
#   ptn = ptn,
#   features = myFeatureList,
#   pdfName = "Example",
#   regOnly = TRUE,
#   allFeat = FALSE,
#   analysis_type = "rf",
#   comparisons = list(c(1, 2))
# )

## ----featImpPNG, echo=FALSE,eval=TRUE, fig.wide = TRUE, fig.cap="Boxplots of Z-Scores of Importance from feature selection using Boruta. Features coloured in green indicate retained features. Shadow feature metrics (randomly shuffled attributes) are indicated in blue."----
knitr::include_graphics("Figures/Figure19.png")

## ----finalRFPNG, eval=TRUE,echo=FALSE, fig.wide = TRUE, fig.cap="Final modelling results using Random Forest Classification. Left panel: Feature importance (mean decrease accuracy) for selected features included in the final model. Right panel: Receiver Operating Characteristic (ROC) curve for Random Forest classification of translationally activated vs. suppressed genes."----
knitr::include_graphics("Figures/Figure20.png")

## ----runningRF, eval=FALSE, echo=TRUE-----------------------------------------
# # Run feature integration modelling using Random Forest classification:
# ptn <- featureIntegration(
#   ptn = ptn,
#   features = myFeatureList,
#   pdfName = "Example",
#   regOnly = TRUE,
#   allFeat = FALSE,
#   analysis_type = "rf",
#   comparisons = list(c(1, 2))
# )

## ----retrieveRF, eval=FALSE, echo=TRUE----------------------------------------
# # Check which models are available in the postNetData object:
# ptn_check_models(ptn = ptn, analysis_type = "rf")
# 
# # Extract the set of selected features and their importance:
# selectedFeaturesRF <- ptn_selectedFeatures(
#   ptn = ptn,
#   analysis_type = "rf",
#   comparison = 1
# )
# 
# # Retrieve the final model results (other options include "preModel" or "borutaModel"):
# finalModel <- ptn_model(ptn, analysis_type = "rf", model = "finalModel", comparison = 1)

## ----IndFeatPNG, eval=TRUE,echo=FALSE, fig.wide = TRUE, fig.cap="Individual feature visualizations. Scatterplots of 5'UTR GC content (left), 5'UTR SCSCGS motif content (middle), and signature of genes translationally activated by mTOR (right) versus translation efficiency (effM). Pearson's correlation coefficients, linear trend lines, and cubic smoothing splines indicated."----
knitr::include_graphics("Figures/Figure21.png")

## ----rfPred, eval=FALSE, echo=TRUE--------------------------------------------
# # Simulate a new gene list by randomly sampling genes from the existing background:
# newGenes <- sample(postNetVignette$background, size = 100)
# newGeneList <- list(translationUp = newGenes[1:50], translationDown = newGenes[51:100])
# 
# # Select the features that were used to train the final Random Forest model in the original dataset:
# predFeatureNames <- names(ptn_selectedFeatures(ptn,
#   analysis_type = "rf",
#   comparison = 1
# ))
# 
# # Prepare the predictive features. In this case the same list of input features will be used
# # to predict as the new gene lists are taken from the same dataset the model was trained on.
# # However, usually the input for the rfPred() function would be taken from a postNetData
# # object from a separate analysis on a distinct dataset.
# 
# newFeatures <- myFeatureList[predFeatureNames]
# 
# # Evaluate the model in the new simulated dataset:
# ptn <- rfPred(
#   ptn = ptn,
#   comparison = 1,
#   predGeneList = newGeneList,
#   predFeatures = newFeatures,
#   pdfName = "Example"
# )

## ----rfPredPNG, eval=TRUE,echo=FALSE, fig.wide = TRUE, fig.cap="Receiver Operating Characteristic (ROC) curve for the Random Forest classification model trained in the example above, applied to a new simulated dataset of randomized genes to predict translationally activated vs. suppressed genes."----
knitr::include_graphics("Figures/Figure22.png")

## ----plotUMAPs, eval=FALSE, echo=TRUE-----------------------------------------
# # Plot UMAP visualizations using the features identified in stepwise regression modelling
# ptn <- plotFeaturesMap(
#   ptn = ptn,
#   regOnly = TRUE,
#   comparisons = list(c(1, 2)),
#   featSel = c(names(ptn_selectedFeatures(ptn,
#     analysis_type = "lm",
#     comparison = 1
#   ))),
#   featCol = c("UTR5_GC"),
#   remBinary = TRUE,
#   scaled = FALSE,
#   centered = TRUE,
#   remExtreme = 0.1,
#   pdfName = "Example"
# )

## ----plotUMAPsPNG, eval=TRUE,echo=FALSE, fig.wide = TRUE, fig.cap="UMAP visualizing changes in translation efficiency (Effect) generated based on features identified in the omnibus model using forward stepwise regression (left), with the 5'UTR GC content of mRNAs coloured (right)."----
knitr::include_graphics("Figures/Figure23.png")

## ----slopeFilt, eval=TRUE, echo=TRUE------------------------------------------
# Get the genes to be filtered out of downstream enrichment analyses
# using the buffering regulatory mode:
filtOutGenes <- slopeFilt(ads,
  regulationGen = "buffering",
  contrastSel = 1
)

## ----GSEAmsigDB, eval=FALSE, echo=TRUE----------------------------------------
# # If using a GSEA collection from MSigDB, check the available versions:
# version <- msigdb::getMsigdbVersions()
# 
# # Retrieve the MSigDB data for "human":
# msigdbOut <- msigdb::getMsigdb(
#   org = "hs",
#   id = "SYM",
#   version = version[1]
# )
# 
# # Check the available collections or subcollections:
# msigdb::listCollections(msigdbOut)
# msigdb::listSubCollections(msigdbOut)
# 
# # Run GSEA on the C5 collection with GO:BP and specific terms:
# ptn <- gseaAnalysis(
#   ptn = ptn,
#   collection = "c5",
#   subcollection = "GO:BP",
#   subsetNames = c(
#     "GOBP_CELL-CELL_SIGNALING_BY_WNT",
#     "GOBP_ENDOCYTOSIS"
#   ),
#   name = "c5_Example"
# )

## ----GSEAcustom, eval=TRUE, echo=TRUE-----------------------------------------
# Create example custom gene sets for GSEA:
set1 <- sample(myGenes[[1]], 100)
set2 <- sample(myGenes[[2]], 100)
inSet <- list(Set1 = set1, Set2 = set2)

# Run GSEA on custom gene sets:
ptn <- gseaAnalysis(
  ptn = ptn,
  geneSet = inSet,
  name = "Example"
)

## ----GSEAplotting, eval=TRUE, echo=TRUE---------------------------------------
# Extract the significant enrichment results from the postNetData object:
gseaOut <- ptn_GSEA(ptn,
  threshold = 0.05
)

# Plot GSEA results:
gseaPlot(
  ptn = ptn,
  termNames = gseaOut$Term[1]
)

## ----GSEAplottingPNG, eval=TRUE,echo=FALSE, fig.wide = TRUE, fig.cap="A plot visualizing GSEA results for the enrichment of the gene Set1 in the example dataset."----
knitr::include_graphics("Figures/Figure24.png")

## ----GAGEanalysis, eval=FALSE, echo=TRUE--------------------------------------
# # Run GAGE:
# ptn <- gageAnalysis(ptn,
#   category = "MF"
# )
# 
# # Extract the significant enrichment results from the postNetData object:
# gageOut <- ptn_GAGE(
#   ptn = ptn,
#   category = "MF",
#   direction = "greater",
#   threshold = 0.05
# )

## ----miRNAanalysis, eval=FALSE, echo=TRUE-------------------------------------
# # An example of the required format for the input for miRNATargetScanFile
# # parameter ("miRNA_predictions_hsa.txt"):
# 
# # Transcript.ID	Gene.Symbol	miRNA.family	Species.ID	Total.num.conserved.sites
# # Number.of.conserved.8mer.sites	Number.of.conserved.7mer.m8.sites
# # Number.of.conserved.7mer.1a.sites	Total.num.nonconserved.sites
# # Number.of.nonconserved.8mer.sites	Number.of.nonconserved.7mer.m8.sites
# # Number.of.nonconserved.7mer.1a.sites	#Number.of.6mer.sites	Representative.miRNA
# # Total.context...score	Cumulative.weighted.context...score	# Aggregate.PCT
# # Predicted.occupancy...low.miRNA	Predicted.occupancy...high.miRNA
# # Predicted.occupancy...transfected.miRNA
# # ENST00000055682.6	KIAA2022	UGGCACU	9606	3	0	0	3	0	0	0	0	0
# # hsa-miR-183-5p.2	-0.604	-0.604	1	0.0807	0.5089	1.8669
# # ENST00000207157.3	TBX15	UGGCACU	9606	1	1	0	0	0	0	0	0	1
# # hsa-miR-183-5p.2	-0.589	-0.552	1	0.1287	0.5218	0.8900
# # ENST00000215832.6	MAPK1	ACAGUAC	9606	2	0	1	1	3	0	2	1	3
# # hsa-miR-101-3p.1	-0.509	-0.279	1	0.0244	0.1671	0.8723
# 
# # Note that this target prediction file has been filtered to include only human miRNA ('hsa')
# 
# # Run GAGE miRNA target enrichment analysis:
# ptn <- miRNAanalysis(ptn,
#   genesSlopeFiltOut = filtOutGenes,
#   miRNATargetScanFile = "miRNA_predictions_hsa.txt",
#   contextScore = -0.2,
#   Pct = 0.7
# )

## ----miRNAanalysisResults, eval=FALSE, echo=TRUE------------------------------
# # Extract the significant miRNA target enrichment results from the postNetData object:
# miRNAout <- ptn_miRNA_analysis(
#   ptn = ptn,
#   direction = "less",
#   threshold = 0.05
# )
# 
# # Extract the lists of genes for miRNA that were found to have targets significantly
# # enriched in the downregulated gene set of interest:
# 
# miRNAtargets <- ptn_miRNA_to_gene(
#   ptn = ptn,
#   miRNAs = c("hsa-miR-138-5p", "hsa-miR-182-5p")
# )

## ----GOanalysis, eval=FALSE, echo=TRUE----------------------------------------
# # Run GO term analysis using a postNetData object:
# ptn <- goAnalysis(
#   ptn = ptn,
#   category = c("BP"), # Note that multiple terms can be run simultaneously
#   name = "Example"
# )

## ----GOplotting, eval=FALSE, echo=TRUE----------------------------------------
# # Extract the significant enrichment results for Biological Process:
# goOut <- ptn_GO(ptn,
#   category = "BP",
#   geneList = "translationUp",
#   threshold = 0.05
# )
# 
# # Plot GO term analysis results:
# goDotplot(
#   ptn = ptn,
#   category = "BP",
#   pool = TRUE,
#   size = "geneRatio",
#   pdfName = "Example"
# )

## ----GOplottingPNG, eval=TRUE,echo=FALSE, fig.wide = TRUE, fig.cap="A dot plot visualizing GO term enrichment analysis results. Dot size corresponds to the ratio of genes included, to the total number of genes in the term."----
knitr::include_graphics("Figures/Figure25.png")

## ----plotSignatures, eval=TRUE, echo=TRUE, message = FALSE, results = 'hide'----
# load human signature data:
data("humanSignatures")

# Examine the regulation of mTOR-sensitive transcripts
plotSignatures(
  ptn = ptn,
  signatureList = humanSignatures[c(
    "Gandin_etal_2016_mTOR_transUp",
    "Gandin_etal_2016_mTOR_transDown"
  )],
  signature_colours = c("red", "blue"),
  dataName = "Example",
  generalName = "mTOR_sensitive_translation",
  xlim = c(-3, 3),
  tableCex = 0.7,
  pdfName = "mTORsignatures"
)

## ----plotSignaturesPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="eCDF of log2 fold changes in translation efficiency for signatures of mTOR-sensitive translation. Grey curve indicates the background gene set. Wilcoxon p-values and differences between the distributions at each quantile are indicated. A shift to the right indicates an increase compared to background, while shift to the left indicates a decrease."----
knitr::include_graphics("Figures/Figure26.png")

## ----plotSignaturesAds, eval=TRUE, echo=TRUE, results='hide', warning = FALSE----
# Examine the regulation of ISR-sensitive transcripts in the results of an anota2seq analysis:
plotSignatures_ads(
  ads = ads,
  contrast = 1,
  dataName = "Osmosis_1h",
  signatureList = humanSignatures[c(
    "Guan_etal_2017_Tg1_transUp",
    "Guan_etal_2017_Tg1_transDown"
  )],
  generalName = c("ISR_activated", "ISR_suppressed"),
  signature_colours = c("red", "blue"),
  xlim = c(-3, 3),
  scatterXY = 4,
  tableCex = 1,
  pdfName = "Example_ISRsignatures"
)

## ----plotSignaturesAdsPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="Gene signature analysis starting from an anota2seqDataSet. Scatterplot of total vs. translated mRNA from an example anota2seq analysis with the location of genes belonging to signatures of interest coloured (left panel). Subsequent panels show the eCDFs of log2 fold changes in translation and buffering regulatory modes, as well as for translated and total mRNA for the signatures of interest. Grey curves indicate the background gene set. Wilcoxon p-values and differences between the distributions at each quantile are indicated. A shift to the right indicates an increase compared to background, while shift to the left indicates a decrease."----
knitr::include_graphics("Figures/Figure27.png")

## ----plotSignaturesHeatmap, eval=TRUE, echo=TRUE, results='hide'--------------
# Examine the regulation of all gene signatures:
signaturesHeatmap(ptn,
  signatureList = humanSignatures,
  unit = "FDR",
  pdfName = "ExampleSignatures"
)

## ----plotSignaturesHeatmapPNG, eval=TRUE, echo=FALSE, fig.wide = TRUE, fig.cap="A heatmap of -log10(FDRs) for regulation of various gene signatures in the example dataset. Colours indicate the directionality of regulation."----
knitr::include_graphics("Figures/Figure28.png")

## ----sessionInfo--------------------------------------------------------------
sessionInfo()