## ----chunk-opts, include = FALSE---------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message = FALSE--------------------------------------------------- library(SummarizedExperiment) library(mutscan) ## ----overview-figure, fig.align = 'center', out.width = "100%", fig.cap = "Overview of the `mutscan` functionality. The `digestFastqs()` function processes each sequencing library, possibly consisting of multiple (pairs of) FASTQ files, separately, and generates an output object containing, among other things, the count table and a summary of the filtering steps. The `summarizeExperiment()` function takes one or more of these objects and combines them into a `SummarizedExperiment` object, that can then be used for downstream analysis such as plotting and statistical testing.", echo = FALSE---- knitr::include_graphics("mutscan-overview.001.png") ## ----list-files--------------------------------------------------------------- datadir <- system.file("extdata", package = "mutscan") dir(datadir) ## ----trans-digest------------------------------------------------------------- transInput <- digestFastqs( fastqForward = file.path(datadir, "transInput_1.fastq.gz"), fastqReverse = file.path(datadir, "transInput_2.fastq.gz"), mergeForwardReverse = FALSE, revComplForward = FALSE, revComplReverse = FALSE, adapterForward = "GGAAGAGCACACGTC", adapterReverse = "GGAAGAGCGTCGTGT", elementsForward = "SUCV", elementsReverse = "SUCV", elementLengthsForward = c(1, 10, 18, 96), elementLengthsReverse = c(1, 8, 20, 96), constantForward = "AACCGGAGGAGGGAGCTG", constantReverse = "GAAAAAGGAAGCTGGAGAGA", avePhredMinForward = 20, avePhredMinReverse = 20, variableNMaxForward = 0, variableNMaxReverse = 0, umiNMax = 0, wildTypeForward = c(FOS = "ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA"), wildTypeReverse = c(JUN = "ATCGCCCGGCTGGAGGAAAAAGTGAAAACCTTGAAAGCTCAGAACTCGGAGCTGGCGTCCACGGCCAACATGCTCAGGGAACAGGTGGCACAGCTT"), nbrMutatedCodonsMaxForward = 1, nbrMutatedCodonsMaxReverse = 1, forbiddenMutatedCodonsForward = "NNW", forbiddenMutatedCodonsReverse = "NNW", mutNameDelimiter = ".", constantMaxDistForward = -1, constantMaxDistReverse = -1, verbose = FALSE ) ## ----trans-pars--------------------------------------------------------------- transInput$parameters ## ----trans-filter------------------------------------------------------------- transInput$filterSummary ## ----trans-summary------------------------------------------------------------ head(transInput$summaryTable) ## ----trans-error-------------------------------------------------------------- transInput$errorStatistics[rowSums(transInput$errorStatistics[, -1]) != 0, ] ## ----trans-error-stats-------------------------------------------------------- (propErrorsConstantF <- sum(transInput$errorStatistics$nbrMismatchForward) / (nchar(transInput$parameters$constantForward) * transInput$filterSummary$nbrRetained)) (propErrorsConstantR <- sum(transInput$errorStatistics$nbrMismatchReverse) / (nchar(transInput$parameters$constantReverse) * transInput$filterSummary$nbrRetained)) ## ----cis-digest--------------------------------------------------------------- cisInput <- digestFastqs( fastqForward = file.path(datadir, "cisInput_1.fastq.gz"), fastqReverse = file.path(datadir, "cisInput_2.fastq.gz"), mergeForwardReverse = TRUE, minOverlap = 0, maxOverlap = 0, maxFracMismatchOverlap = 1, greedyOverlap = TRUE, revComplForward = FALSE, revComplReverse = TRUE, adapterForward = "GGAAGAGCACACGTC", adapterReverse = "GGAAGAGCGTCGTGT", elementsForward = "SUCV", elementsReverse = "SUCVS", elementLengthsForward = c(1, 10, 18, 96), elementLengthsReverse = c(1, 7, 17, 96, -1), constantForward = "AACCGGAGGAGGGAGCTG", constantReverse = "GAGTTCATCCTGGCAGC", primerForward = "", primerReverse = "", avePhredMinForward = 20, avePhredMinReverse = 20, variableNMaxForward = 0, variableNMaxReverse = 0, umiNMax = 0, wildTypeForward = c(FOS = "ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA"), wildTypeReverse = "", nbrMutatedCodonsMaxForward = 1, nbrMutatedCodonsMaxReverse = 1, forbiddenMutatedCodonsForward = "NNW", forbiddenMutatedCodonsReverse = "NNW", mutNameDelimiter = ".", constantMaxDistForward = -1, constantMaxDistReverse = -1, verbose = TRUE ) ## ----cis-out------------------------------------------------------------------ cisInput$parameters cisInput$filterSummary cisInput$errorStatistics[rowSums(cisInput$errorStatistics[, -1]) != 0, ] ## ----cis-summary-------------------------------------------------------------- head(cisInput$summaryTable) ## ----def-primers-------------------------------------------------------------- leu <- c( ATF2 = "GATCCTGATGAAAAAAGGAGAAAGTTTTTAGAGCGAAATAGAGCAGCAGCTTCAAGATGCCGACAAAAAAGGAAAGTCTGGGTTCAGTCTTTAGAGAAGAAAGCTGAAGACTTGAGTTCATTAAATGGTCAGCTGCAGAGTGAAGTCACCCTGCTGAGAAATGAAGTGGCACAGCTGAAACAGCTTCTTCTGGCT", ATF7 = "GATCCAGATGAGCGACGGCAGCGCTTTCTGGAGCGCAACCGGGCTGCAGCCTCCCGCTGCCGCCAAAAGCGAAAGCTGTGGGTGTCCTCCCTAGAGAAGAAGGCCGAAGAACTCACTTCTCAGAACATTCAGCTGAGTAATGAAGTCACATTACTACGCAATGAGGTGGCCCAGTTGAAACAGCTACTGTTAGCT", CREB5 = "GATCCGGACGAGAGGCGGCGGAAATTTCTGGAACGGAACCGGGCAGCTGCCACCCGCTGCAGACAGAAGAGGAAGGTCTGGGTGATGTCATTGGAAAAGAAAGCAGAAGAACTCACCCAGACAAACATGCAGCTTCAGAATGAAGTGTCTATGTTGAAAAATGAGGTGGCCCAGCTGAAACAGTTGTTGTTAACA", ATF3 = "GAAGAAGATGAAAGGAAAAAGAGGCGACGAGAAAGAAATAAGATTGCAGCTGCAAAGTGCCGAAACAAGAAGAAGGAGAAGACGGAGTGCCTGCAGAAAGAGTCGGAGAAGCTGGAAAGTGTGAATGCTGAACTGAAGGCTCAGATTGAGGAGCTCAAGAACGAGAAGCAGCATTTGATATACATGCTCAACCTT", JDP2 = "GAGGAAGAGGAGCGAAGGAAAAGGCGCCGGGAGAAGAACAAAGTCGCAGCAGCCCGATGCCGGAACAAGAAGAAGGAGCGCACGGAGTTTCTGCAGCGGGAATCCGAGCGGCTGGAACTCATGAACGCAGAGCTGAAGACCCAGATTGAGGAGCTGAAGCAGGAGCGGCAGCAGCTCATCCTGATGCTGAACCGA", ATF4 = "GAGAAACTGGATAAGAAGCTGAAAAAAATGGAGCAAAACAAGACAGCAGCCACTAGGTACCGCCAGAAGAAGAGGGCGGAGCAGGAGGCTCTTACTGGTGAGTGCAAAGAGCTGGAAAAGAAGAACGAGGCTCTAAAAGAGAGGGCGGATTCCCTGGCCAAGGAGATCCAGTACCTGAAAGATTTGATAGAAGAG", ATF5 = "ACCCGAGGGGACCGCAAGCAAAAGAAGAGAGACCAGAACAAGTCGGCGGCTCTGAGGTACCGCCAGCGGAAGCGGGCAGAGGGTGAGGCCCTGGAGGGCGAGTGCCAGGGGCTGGAGGCACGGAATCGCGAGCTGAAGGAACGGGCAGAGTCCGTGGAGCGCGAGATCCAGTACGTCAAGGACCTGCTCATCGAG", CREBZF = "AGTCCCCGGAAGGCGGCGGCGGCCGCTGCCCGCCTTAATCGACTGAAGAAGAAGGAGTACGTGATGGGGCTGGAGAGTCGAGTCCGGGGTCTGGCAGCCGAGAACCAGGAGCTGCGGGCCGAGAATCGGGAGCTGGGCAAACGCGTACAGGCACTGCAGGAGGAGAGTCGCTACCTACGGGCAGTCTTAGCCAAC", BATF2 = "CCCAAGGAGCAACAAAGGCAGCTGAAGAAGCAGAAGAACCGGGCAGCCGCCCAGCGAAGCCGGCAGAAGCACACAGACAAGGCAGACGCCCTGCACCAGCAGCACGAGTCTCTGGAAAAAGACAACCTCGCCCTGCGGAAGGAGATCCAGTCCCTGCAGGCCGAGCTGGCGTGGTGGAGCCGGACCCTGCACGTG", BATF3 = "GAGGATGATGACAGGAAGGTCCGAAGGAGAGAAAAAAACCGAGTTGCTGCTCAGAGAAGTCGGAAGAAGCAGACCCAGAAGGCTGACAAGCTCCATGAGGAATATGAGAGCCTGGAGCAAGAAAACACCATGCTGCGGAGAGAGATCGGGAAGCTGACAGAGGAGCTGAAGCACCTGACAGAGGCACTGAAGGAG", CEBPE = "AAAGATAGCCTTGAGTACCGGCTGAGGCGGGAGCGCAACAACATCGCCGTGCGCAAGAGCCGAGACAAGGCCAAGAGGCGCATTCTGGAGACGCAGCAGAAGGTGCTGGAGTACATGGCAGAGAACGAGCGCCTCCGCAGCCGCGTGGAGCAGCTCACCCAGGAGCTAGACACCCTCCGCAACCTCTTCCGCCAG", BACH1 = "CTGGATTGTATCCATGATATTCGAAGAAGAAGTAAAAACAGAATTGCTGCACAGCGCTGTCGCAAGAGAAAACTTGACTGTATACAGAATCTTGAATCAGAAATTGAGAAGCTGCAAAGTGAAAAGGAGAGCTTGTTGAAGGAAAGAGATCACATTTTGTCAACTCTGGGTGAGACAAAGCAGAACCTAACTGGA", BACH2 = "TTAGAGTTTATTCATGATGTCCGACGGCGCAGCAAGAACCGCATCGCGGCCCAGCGCTGCCGCAAAAGGAAACTGGACTGTATTCAGAATTTAGAATGTGAAATCCGCAAATTGGTGTGTGAGAAAGAGAAACTGTTGTCAGAGAGGAATCAACTGAAAGCATGCATGGGGGAACTGTTGGACAACTTCTCCTGC", NFE2L1 = "CTGAGCCTCATCCGAGACATCCGGCGCCGGGGCAAGAACAAGATGGCGGCGCAGAACTGCCGCAAGCGCAAGCTGGACACCATCCTGAATCTGGAGCGTGATGTGGAGGACCTGCAGCGTGACAAAGCCCGGCTGCTGCGGGAGAAAGTGGAGTTCCTGCGCTCCCTGCGACAGATGAAGCAGAAGGTCCAGAGC", NFE2 = "CTAGCGCTAGTCCGGGACATCCGACGACGGGGCAAAAACAAGGTGGCAGCCCAGAACTGCCGCAAGAGGAAGCTGGAAACCATTGTGCAGCTGGAGCGGGAGCTGGAGCGGCTGACCAATGAACGGGAGCGGCTTCTCAGGGCCCGCGGGGAGGCAGACCGGACCCTGGAGGTCATGCGCCAACAGCTGACAGAG", NFIL3 = "AAGAAAGATGCTATGTATTGGGAAAAAAGGCGGAAAAATAATGAAGCTGCCAAAAGATCTCGTGAGAAGCGTCGACTGAATGACCTGGTTTTAGAGAACAAACTAATTGCACTGGGAGAAGAAAACGCCACTTTAAAAGCTGAGCTGCTTTCACTAAAATTAAAGTTTGGTTTAATTAGCTCCACAGCATATGCT", FOS = "GAAGAAGAAGAGAAAAGGAGAATCCGAAGGGAAAGGAATAAGATGGCTGCAGCCAAATGCCGCAACCGGAGGAGGGAGCTGACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTAGAGTTCATCCTGGCAGCT", FOSB = "GAGGAAGAGGAGAAGCGAAGGGTGCGCCGGGAACGAAATAAACTAGCAGCAGCTAAATGCAGGAACCGGCGGAGGGAGCTGACCGACCGACTCCAGGCGGAGACAGATCAGTTGGAGGAAGAAAAAGCAGAGCTGGAGTCGGAGATCGCCGAGCTCCAAAAGGAGAAGGAACGTCTGGAGTTTGTGCTGGTGGCC", FOSL1 = "GAGGAAGAGGAGCGCCGCCGAGTAAGGCGCGAGCGGAACAAGCTGGCTGCGGCCAAGTGCAGGAACCGGAGGAAGGAACTGACCGACTTCCTGCAGGCGGAGACTGACAAACTGGAAGATGAGAAATCTGGGCTGCAGCGAGAGATTGAGGAGCTGCAGAAGCAGAAGGAGCGCCTAGAGCTGGTGCTGGAAGCC", FOSL2 = "GAAGAGGAGGAGAAGCGTCGCATCCGGCGGGAGAGGAACAAGCTGGCTGCAGCCAAGTGCCGGAACCGACGCCGGGAGCTGACAGAGAAGCTGCAGGCGGAGACAGAGGAGCTGGAGGAGGAGAAGTCAGGCCTGCAGAAGGAGATTGCTGAGCTGCAGAAGGAGAAGGAGAAGCTGGAGTTCATGTTGGTGGCT", MAFB = "GTGATCCGCCTGAAGCAGAAGCGGCGGACCCTGAAGAACCGGGGCTACGCCCAGTCTTGCAGGTATAAACGCGTCCAGCAGAAGCACCACCTGGAGAATGAGAAGACGCAGCTCATTCAGCAGGTGGAGCAGCTTAAGCAGGAGGTGTCCCGGCTGGCCCGCGAGAGAGACGCCTACAAGGTCAAGTGCGAGAAA", JUN = "CAGGAGCGGATCAAGGCGGAGAGGAAGCGCATGAGGAACCGCATCGCTGCCTCCAAGTGCCGAAAAAGGAAGCTGGAGAGAATCGCCCGGCTGGAGGAAAAAGTGAAAACCTTGAAAGCTCAGAACTCGGAGCTGGCGTCCACGGCCAACATGCTCAGGGAACAGGTGGCACAGCTTAAACAGAAAGTCATGAAC", JUNB = "CAAGAGCGCATCAAAGTGGAGCGCAAGCGGCTGCGGAACCGGCTGGCGGCCACCAAGTGCCGGAAGCGGAAGCTGGAGCGCATCGCGCGCCTGGAGGACAAGGTGAAGACGCTCAAGGCCGAGAACGCGGGGCTGTCGAGTACCGCCGGCCTCCTCCGGGAGCAGGTGGCCCAGCTCAAACAGAAGGTCATGACC", JUND = "CAGGAGCGCATCAAGGCGGAGCGCAAGCGGCTGCGCAACCGCATCGCCGCCTCCAAGTGCCGCAAGCGCAAGCTGGAGCGCATCTCGCGCCTGGAAGAGAAAGTGAAGACCCTCAAGAGTCAGAACACGGAGCTGGCGTCCACGGCGAGCCTGCTGCGCGAGCAGGTGGCGCAGCTCAAGCAGAAAGTCCTCAGC", CREB3 = "GAACAAATTCTGAAACGTGTGCGGAGGAAGATTCGAAATAAAAGATCTGCTCAAGAGAGCCGCAGGAAAAAGAAGGTGTATGTTGGGGGTTTAGAGAGCAGGGTCTTGAAATACACAGCCCAGAATATGGAGCTTCAGAACAAAGTACAGCTTCTGGAGGAACAGAATTTGTCCCTTCTAGATCAACTGAGGAAA", HLF = "CTGAAGGATGACAAGTACTGGGCAAGGCGCAGAAAGAACAACATGGCAGCCAAGCGCTCCCGCGACGCCCGGAGGCTGAAAGAGAACCAGATCGCCATCCGGGCCTCGTTCCTGGAGAAGGAGAACTCGGCCCTCCGCCAGGAGGTGGCTGACTTGAGGAAGGAGCTGGGCAAATGCAAGAACATACTTGCCAAG", MAFG = "ATCGTCCAGCTGAAGCAGCGCCGGCGCACGCTCAAGAACCGCGGCTACGCTGCCAGCTGCCGCGTGAAGCGGGTGACGCAGAAGGAGGAGCTGGAGAAGCAGAAGGCGGAGCTGCAGCAGGAGGTGGAGAAGCTGGCCTCAGAGAACGCCAGCATGAAGCTGGAGCTCGACGCGCTGCGCTCCAAGTACGAGGCG", MAFK = "GTGACCCGCCTGAAGCAGCGTCGGCGCACACTCAAGAACCGCGGCTACGCGGCCAGCTGCCGCATCAAGCGGGTGACGCAGAAGGAGGAGCTGGAGCGGCAGCGCGTGGAGCTGCAGCAGGAGGTGGAGAAGCTGGCGCGTGAGAACAGCAGCATGCGGCTGGAGCTGGACGCCCTGCGCTCCAAGTACGAGGCG", XBP1 = "AGCCCCGAGGAGAAGGCGCTGAGGAGGAAACTGAAAAACAGAGTAGCAGCTCAGACTGCCAGAGATCGAAAGAAGGCTCGAATGAGTGAGCTGGAACAGCAAGTGGTAGATTTAGAAGAAGAGAACCAAAAACTTTTGCTAGAAAATCAGCTTTTACGAGAGAAAACTCATGGCCTTGTAGTTGAGAACCAGGAG", ATF6 = "ATTGCTGTGCTAAGGAGACAGCAACGTATGATAAAAAATCGAGAATCCGCTTGTCAGTCTCGCAAGAAGAAGAAAGAATATATGCTAGGGTTAGAGGCGAGATTAAAGGCTGCCCTCTCAGAAAACGAGCAACTGAAGAAAGAAAATGGAACACTGAAGCGGCAGCTGGATGAAGTTGTGTCAGAGAACCAGAGG", ATF6B = "GCAAAGCTGCTGAAGCGGCAGCAGCGAATGATCAAGAACCGGGAGTCAGCCTGCCAGTCCCGGAGAAAGAAGAAAGAGTATCTGCAGGGACTGGAGGCTCGGCTGCAAGCAGTACTGGCTGACAACCAGCAGCTCCGCCGAGAGAATGCTGCCCTCCGGCGGCGGCTGGAGGCCCTGCTGGCTGAAAACAGCGAG", CEBPA = "AAGAACAGCAACGAGTACCGGGTGCGGCGCGAGCGCAACAACATCGCGGTGCGCAAGAGCCGCGACAAGGCCAAGCAGCGCAACGTGGAGACGCAGCAGAAGGTGCTGGAGCTGACCAGTGACAATGACCGCCTGCGCAAGCGGGTGGAACAGCTGAGCCGCGAACTGGACACGCTGCGGGGCATCTTCCGCCAG", CEBPB = "AAGCACAGCGACGAGTACAAGATCCGGCGCGAGCGCAACAACATCGCCGTGCGCAAGAGCCGCGACAAGGCCAAGATGCGCAACCTGGAGACGCAGCACAAGGTCCTGGAGCTCACGGCCGAGAACGAGCGGCTGCAGAAGAAGGTGGAGCAGCTGTCGCGCGAGCTCAGCACCCTGCGGAACTTGTTCAAGCAG", CEBPD = "CGCGGCAGCCCCGAGTACCGGCAGCGGCGCGAGCGCAACAACATCGCCGTGCGCAAGAGCCGCGACAAGGCCAAGCGGCGCAACCAGGAGATGCAGCAGAAGTTGGTGGAGCTGTCGGCTGAGAACGAGAAGCTGCACCAGCGCGTGGAGCAGCTCACGCGGGACCTGGCCGGCCTCCGGCAGTTCTTCAAGCAG", CEBPG = "CGAAACAGTGACGAGTATCGGCAACGCCGAGAGAGGAACAACATGGCTGTGAAAAAGAGCCGGTTGAAAAGCAAGCAGAAAGCACAAGACACACTGCAGAGAGTCAATCAGCTCAAAGAAGAGAATGAACGGTTGGAAGCAAAAATCAAATTGCTGACCAAGGAATTAAGTGTACTCAAAGATTTGTTTCTTGAG", CREB1 = "GAAGCAGCACGAAAGAGAGAGGTCCGTCTAATGAAGAACAGGGAAGCAGCTCGAGAGTGTCGTAGAAAGAAGAAAGAATATGTGAAATGTTTAGAAAACAGAGTGGCAGTGCTTGAAAATCAAAACAAGACATTGATTGAGGAGCTAAAAGCACTTAAGGACCTTTACTGCCACAAATCAGAT", CREB3L1 = "GAGAAGGCCTTGAAGAGAGTCCGGAGGAAAATCAAGAACAAGATCTCAGCCCAGGAGAGCCGTCGTAAGAAGAAGGAGTATGTGGAGTGTCTAGAAAAGAAGGTGGAGACATTTACATCTGAGAACAATGAACTGTGGAAGAAGGTGGAGACCCTGGAGAATGCCAACAGGACCCTGCTCCAGCAGCTGCAGAAA", CREB3L2 = "GAGAAGGCCCTGAAGAAAATTCGGAGGAAGATCAAGAATAAGATTTCTGCTCAGGAAAGTAGGAGAAAGAAGAAAGAATACATGGACAGCCTGGAGAAAAAAGTGGAGTCTTGTTCAACTGAGAACTTGGAGCTTCGGAAGAAGGTAGAGGTTCTAGAGAACACTAATAGGACTCTCCTTCAGCAACTCCAGAAG", CREB3L3 = "GAGCGAGTGCTGAAAAAAATCCGCCGGAAAATCCGGAACAAGCAGTCGGCGCAAGAAAGCAGGAAGAAGAAGAAGGAATATATCGATGGCCTGGAGACTCGGATGTCAGCTTGCACTGCTCAGAATCAGGAGTTACAGAGGAAAGTCTTGCATCTCGAGAAGCAAAACCTGTCCCTCTTGGAGCAACTGAAGAAA", CREB3L4 = "GAGAGGGTCCTCAAGAAGGTCAGGAGGAAAATCCGTAACAAGCAGTCAGCTCAGGACAGTCGGCGGCGGAAGAAGGAGTACATTGATGGGCTGGAGAGCAGGGTGGCAGCCTGTTCTGCACAGAACCAAGAATTACAGAAAAAAGTCCAGGAGCTGGAGAGGCACAACATCTCCTTGGTAGCTCAGCTCCGCCAG", CREBL2 = "CCAGCCAAAATTGACTTGAAAGCAAAACTTGAGAGGAGCCGGCAGAGTGCAAGAGAATGCCGAGCCCGAAAAAAGCTGAGATATCAGTATTTGGAAGAGTTGGTATCCAGTCGAGAAAGAGCTATATGTGCCCTCAGAGAGGAACTGGAAATGTACAAGCAGTGGTGCATGGCAATGGACCAAGGAAAAATCCCT", CREBRF = "CCCTTAACAGCCCGACCAAGGTCAAGGAAGGAAAAAAATAAGCTGGCTTCCAGAGCTTGTCGGTTAAAGAAGAAAGCCCAGTATGAAGCTAATAAAGTGAAATTATGGGGCCTCAACACAGAATATGATAATTTATTGTTTGTAATCAACTCCATCAAGCAAGAGATTGTAAACCGGGTACAGAATCCAAGAGAT", DBP = "CAGAAGGATGAGAAATACTGGAGCCGGCGGTACAAGAACAACGAGGCAGCCAAGCGGTCCCGTGACGCCCGGCGGCTCAAGGAGAACCAGATATCGGTGCGGGCGGCCTTCCTGGAGAAGGAGAACGCCCTGCTGCGGCAGGAAGTTGTGGCCGTGCGCCAGGAGCTGTCCCACTACCGCGCCGTGCTGTCCCGA", NFE2L2 = "CTTGCATTAATTCGGGATATACGTAGGAGGGGTAAGAATAAAGTGGCTGCTCAGAATTGCAGAAAAAGAAAACTGGAAAATATAGTAGAACTAGAGCAAGATTTAGATCATTTGAAAGATGAAAAAGAAAAATTGCTCAAAGAAAAAGGAGAAAATGACAAAAGCCTTCACCTACTGAAAAAACAACTCAGCACC", NFE2L3 = "GTCTCACTTATCCGTGACATCAGACGAAGAGGGAAAAATAAAGTTGCTGCGCAGAACTGTCGTAAACGCAAATTGGACATAATTTTGAATTTAGAAGATGATGTATGTAACTTGCAAGCAAAGAAGGAAACTCTTAAGAGAGAGCAAGCACAATGTAACAAAGCTATTAACATAATGAAACAGAAACTGCATGAC", TEF = "CAGAAGGATGAAAAGTACTGGACAAGACGCAAGAAGAACAACGTGGCAGCTAAACGGTCACGGGATGCCCGGCGCCTGAAAGAGAATCAGATCACCATCCGGGCAGCCTTCCTGGAGAAGGAGAACACAGCCCTGCGGACGGAGGTGGCCGAGCTACGCAAGGAGGTGGGCAAGTGCAAGACCATCGTGTCCAAG" ) ## ----primers-digest----------------------------------------------------------- leujunt0 <- digestFastqs( fastqForward = file.path(datadir, "leujunt0_1.fastq.gz"), fastqReverse = file.path(datadir, "leujunt0_2.fastq.gz"), mergeForwardReverse = FALSE, revComplForward = FALSE, revComplReverse = FALSE, elementsForward = "SPV", elementsReverse = "SPVS", elementLengthsForward = c(-1, 19, -1), elementLengthsReverse = c(-1, 20, 96, -1), constantForward = "", constantReverse = "", adapterForward = "", adapterReverse = "", primerForward = "GTCAGGTGGAGGCGGATCC", primerReverse = "GAAAAAGGAAGCTGGAGAGA", avePhredMinForward = 20, avePhredMinReverse = 20, variableNMaxForward = 0, variableNMaxReverse = 0, umiNMax = 0, wildTypeForward = leu, wildTypeReverse = c(JUN = "ATCGCCCGGCTGGAGGAAAAAGTGAAAACCTTGAAAGCTCAGAACTCGGAGCTGGCGTCCACGGCCAACATGCTCAGGGAACAGGTGGCACAGCTT"), nbrMutatedCodonsMaxForward = 0, nbrMutatedCodonsMaxReverse = 1, forbiddenMutatedCodonsForward = "", forbiddenMutatedCodonsReverse = "NNW", mutatedPhredMinForward = 0.0, mutatedPhredMinReverse = 0.0, mutNameDelimiter = ".", verbose = TRUE ) ## ----primers-out-------------------------------------------------------------- leujunt0$parameters leujunt0$filterSummary ## ----primers-summary---------------------------------------------------------- head(leujunt0$summaryTable) ## ----combine-digest----------------------------------------------------------- transOutput <- digestFastqs( fastqForward = file.path(datadir, "transOutput_1.fastq.gz"), fastqReverse = file.path(datadir, "transOutput_2.fastq.gz"), mergeForwardReverse = FALSE, revComplForward = FALSE, revComplReverse = FALSE, adapterForward = "GGAAGAGCACACGTC", adapterReverse = "GGAAGAGCGTCGTGT", elementsForward = "SUCV", elementsReverse = "SUCV", elementLengthsForward = c(1, 10, 18, 96), elementLengthsReverse = c(1, 8, 20, 96), constantForward = "AACCGGAGGAGGGAGCTG", constantReverse = "GAAAAAGGAAGCTGGAGAGA", primerForward = "", primerReverse = "", avePhredMinForward = 20, avePhredMinReverse = 20, variableNMaxForward = 0, variableNMaxReverse = 0, umiNMax = 0, wildTypeForward = c(FOS = "ACTGATACACTCCAAGCGGAGACAGACCAACTAGAAGATGAGAAGTCTGCTTTGCAGACCGAGATTGCCAACCTGCTGAAGGAGAAGGAAAAACTA"), wildTypeReverse = c(JUN = "ATCGCCCGGCTGGAGGAAAAAGTGAAAACCTTGAAAGCTCAGAACTCGGAGCTGGCGTCCACGGCCAACATGCTCAGGGAACAGGTGGCACAGCTT"), nbrMutatedCodonsMaxForward = 1, nbrMutatedCodonsMaxReverse = 1, forbiddenMutatedCodonsForward = "NNW", forbiddenMutatedCodonsReverse = "NNW", mutNameDelimiter = ".", constantMaxDistForward = -1, constantMaxDistReverse = -1, verbose = FALSE ) ## Generate SummarizedExperiment object se <- summarizeExperiment( x = list(sample1 = transInput, sample2 = transOutput), coldata = data.frame(Name = c("sample1", "sample2"), Condition = c("input", "output"), Replicate = c("R1", "R1"), OD = c(0.05, 1.5)) ) ## The SummarizedExperiment contains a count matrix, and annotations ## for samples and variants in the `colData` and `rowData`, ## respectively. head(assay(se, "counts")) Matrix::colSums(assay(se, "counts")) head(rowData(se)) colData(se) ## Count type (reads or UMIs) metadata(se)$countType ## ----collapse-aa-------------------------------------------------------------- se_collapsed <- collapseMutantsByAA(se) head(assay(se_collapsed, "counts")) Matrix::colSums(assay(se_collapsed, "counts")) colData(se_collapsed) ## ----read-se------------------------------------------------------------------ se <- readRDS(file.path(datadir, "GSE102901_cis_se.rds")) ## ----plot-filter-reads, fig.height=10, fig.width=8---------------------------- plotFiltering(se, valueType = "reads", onlyActiveFilters = TRUE, plotType = "remaining", facetBy = "sample", numberSize = 3) ## ----plot-filter-frac, fig.height=10, fig.width=8----------------------------- plotFiltering(se, valueType = "fractions", onlyActiveFilters = TRUE, plotType = "filtered", facetBy = "step", numberSize = 3) ## ----plot-pairs, fig.height=7------------------------------------------------- plotPairs(se, selAssay = "counts") ## ----plot-more---------------------------------------------------------------- plotTotals(se, selAssay = "counts") plotDistributions(se, selAssay = "counts", plotType = "density", pseudocount = 1) ## ----make-qc-report, eval=FALSE----------------------------------------------- # generateQCReport(se, outFile = tempfile(fileext = ".html")) ## ----calc-ppi----------------------------------------------------------------- se_collapsed <- collapseMutantsByAA(se) ppis <- calculateFitnessScore( se = se_collapsed, pairingCol = "Replicate", ODCols = c("OD1", "OD2"), comparison = c("Condition", "cis_output", "cis_input"), WTrows = "f.0.WT" ) head(ppis[order(abs(rowMeans(ppis)), decreasing = TRUE), , drop = FALSE]) ## The PPI score for the WT sequence is 1, by construction ppis["f.0.WT", , drop = FALSE] ## ----def-mm------------------------------------------------------------------- model.matrix(~ Replicate + Condition, data = colData(se_collapsed)) ## ----calc-rel-fc-------------------------------------------------------------- ## edgeR edger_scores <- calculateRelativeFC( se = se_collapsed, design = model.matrix(~ Replicate + Condition, data = colData(se_collapsed)), coef = "Conditioncis_output", pseudocount = 1, WTrows = "f.0.WT", method = "edgeR") head(edger_scores[order(edger_scores$PValue), , drop = FALSE]) ## As before, the WT sequence has a logFC close to 0, by construction edger_scores["f.0.WT", , drop = FALSE] ## limma limma_scores <- calculateRelativeFC( se = se_collapsed, design = model.matrix(~ Replicate + Condition, data = colData(se_collapsed)), coef = "Conditioncis_output", pseudocount = 1, WTrows = "f.0.WT", method = "limma") head(limma_scores[order(limma_scores$P.Value), , drop = FALSE]) ## As before, the WT sequence has a logFC close to 0, by construction limma_scores["f.0.WT", , drop = FALSE] ## ----plot-rel-fc-------------------------------------------------------------- plotMeanDiff(edger_scores, pointSize = "large") plotVolcano(edger_scores, pointSize = "large") ## ----session-info------------------------------------------------------------- sessionInfo()