## ----<knitr, echo=FALSE, message=FALSE, results="hide"--------------------- library("knitr") opts_chunk$set( tidy = TRUE, dev = "png", fig.width = 11, cache = TRUE, message = FALSE) ## ----loadLibaries, echo=TRUE, message=FALSE, results="hide"---------------- library(SNPhoodData) library(SNPhood) ## ----checkFiles, echo=TRUE------------------------------------------------- (files = list.files(pattern = "*",system.file("extdata", package = "SNPhoodData"),full.names = TRUE)) fileUserRegions = files[grep(".txt", files)] fileGenotypes = files[grep("genotypes", files)] ## ----getParameters, echo=TRUE---------------------------------------------- (par.l = getDefaultParameterList(path_userRegions = fileUserRegions, isPairedEndData = TRUE)) ## ----changeParameters, echo=TRUE------------------------------------------- # Verify that you do not have zero-based coordinates par.l$zeroBasedCoordinates par.l$readGroupSpecific par.l$poolDatasets = TRUE par.l$binSize = 25 ## ----createFileList, echo=TRUE--------------------------------------------- patternBAMFiles = paste0(dirname(files[3]), "/*.bam") (files.df = collectFiles(patternFiles = patternBAMFiles, verbose = TRUE)) ## ----assignIndividualID, echo=TRUE----------------------------------------- files.df$individual = c("GM10847", "GM10847", "GM12890", "GM12890") files.df ## ----qualityTestPrep, echo=TRUE, results="hide"---------------------------- par.l$poolDatasets = FALSE SNPhood.o = analyzeSNPhood(par.l, files.df, onlyPrepareForDatasetCorrelation = TRUE, verbose = TRUE) ## ----qualityTest, echo=TRUE------------------------------------------------ SNPhood.o = plotAndCalculateCorrelationDatasets(SNPhood.o, fileToPlot = NULL) corrResults = results(SNPhood.o, type = "samplesCorrelation") mean(corrResults$corTable[lower.tri(corrResults$corTable)]) ## ----runAnalysis, echo=TRUE, results="hide"-------------------------------- par.l$poolDatasets = TRUE SNPhood.o = analyzeSNPhood(par.l, files.df, onlyPrepareForDatasetCorrelation = FALSE, verbose = TRUE) ## ----SNPhoodObj, echo=TRUE, results="hide"--------------------------------- SNPhood.o ## Retrieve number of regions, datasets, bins, and read groups nRegions(SNPhood.o) nDatasets(SNPhood.o) nBins(SNPhood.o) nReadGroups(SNPhood.o) ## Retrieve general annotation of SNPhood object with all its different elements names(annotation(SNPhood.o)) annotation(SNPhood.o)$regions annotation(SNPhood.o)$files ## Retrieve the parameters that were used for the analysis head(parameters(SNPhood.o)) names(parameters(SNPhood.o)) ## Retrieve annotation of regions head(annotationRegions(SNPhood.o)) ## Retrieve annotation of bins annotationBins(SNPhood.o) head(annotationBins2(SNPhood.o,fullAnnotation = TRUE)) SNP_names = c("rs7275860","rs76473124") head(annotationBins2(SNPhood.o, regions = 1:10, fullAnnotation = FALSE)) annotationBins2(SNPhood.o, regions = SNP_names, fullAnnotation = TRUE) ## Retrieve annotation of datasets annotationDatasets(SNPhood.o) annotationReadGroups(SNPhood.o) ## Extract counts after the binning # Extract one count matrix from the paternal read group from the first dataset head(counts(SNPhood.o, type = "binned", readGroup = "paternal", dataset = 1)) # Extract count matrices from all read groups from the first dataset str(counts(SNPhood.o, type = "binned", readGroup = NULL, dataset = 1)) # Extract count matrices from all read groups from the first dataset (using its name) DataSetName <- annotationDatasets(SNPhood.o)[1] str(counts(SNPhood.o, type = "binned", readGroup = NULL, dataset = DataSetName)) # Extract count matrices from all read groups from the all dataset str(counts(SNPhood.o, type = "binned", dataset = NULL)) ## Similarly, you can also extract counts before the binning head(counts(SNPhood.o, type = "unbinned", readGroup = "paternal", dataset = 1)) ## If you had enrichments instead of counts, you would employ the enrichments method in analogy to counts enrichment(SNPhood.o, readGroup = "paternal") ## ----SNPhoodObj2, echo=TRUE, results="hide"-------------------------------- SNPhood.o ## Rename regions, datasets, bins, and read groups mapping = as.list(paste0(annotationRegions(SNPhood.o),".newName")) names(mapping) = annotationRegions(SNPhood.o) SNPhood_mod.o = renameRegions(SNPhood.o, mapping) mapping = list("Individual1", "Individual2") names(mapping) = annotationDatasets(SNPhood.o) SNPhood_mod.o = renameDatasets(SNPhood.o, mapping) mapping = list("Bin1_NEW") names(mapping) = annotationBins(SNPhood.o)[1] SNPhood_mod.o = renameBins(SNPhood.o, mapping) mapping = list("a", "b", "c") names(mapping) = annotationReadGroups(SNPhood.o) SNPhood_mod.o = renameReadGroups(SNPhood.o, mapping) ## Delete regions, datasets, and read groups (deleting bins is still in development) SNPhood_mod.o = deleteRegions(SNPhood.o, regions = 1:5) SNPhood_mod.o = deleteRegions(SNPhood.o, regions = c("rs9984805", "rs59121565")) SNPhood_mod.o = deleteDatasets(SNPhood.o, datasets = 1) SNPhood_mod.o = deleteDatasets(SNPhood.o, datasets = "GM12890") # For read groups, we currently support only a name referral SNPhood_mod.o = deleteReadGroups(SNPhood.o, readGroups = "paternal") ## Merge read groups SNPhood_merged.o = mergeReadGroups(SNPhood.o) nReadGroups(SNPhood_merged.o) annotationReadGroups(SNPhood_merged.o) ## ----visualizeCounts, echo=TRUE, results="hide"---------------------------- plotBinCounts(SNPhood.o, regions = 2) plotBinCounts(SNPhood.o, regions = 2, plotGenotypeRatio = TRUE, readGroups = c("paternal","maternal")) plotRegionCounts(SNPhood.o, regions = 1:5, plotRegionBoundaries = TRUE, sizePoints = 2, plotRegionLabels = TRUE, mergeReadGroupCounts = TRUE) plotRegionCounts(SNPhood.o, regions = NULL, plotChr = "chr21", sizePoints = 2) ## ----visualizeCounts2, echo=TRUE, results="hide"--------------------------- plotBinCounts(SNPhood.o, regions = NULL, readGroups = c("paternal","maternal")) ## ----allelicBias, echo=TRUE------------------------------------------------ # Run the analysis, perform no time-consuming background calculation for now SNPhood.o = testForAllelicBiases(SNPhood.o, readGroups = c("paternal", "maternal"), calcBackgroundDistr = TRUE, nRepetitions = 100, verbose = FALSE) # Extract the results of the analysis, again using the results function names(results(SNPhood.o, type = "allelicBias")) head(results(SNPhood.o, type = "allelicBias", elements = "pValue")[[1]], 4) # Extract the results of the FDR calculation for the first dataset FDR_dataset1 = results(SNPhood.o, type = "allelicBias", elements = "FDR_results")[[1]] head(FDR_dataset1, 20) # Extract the results of the FDR calculation for the second dataset FDR_dataset2 = results(SNPhood.o, type = "allelicBias", elements = "FDR_results")[[2]] head(FDR_dataset2, 20) maxFDR = 0.1 signThresholdFDR_dataset1 = FDR_dataset1$pValueThreshold[max(which(FDR_dataset1$FDR < maxFDR))] signThresholdFDR_dataset2 = FDR_dataset2$pValueThreshold[max(which(FDR_dataset1$FDR < maxFDR))] ## ----allelicBias2, echo=TRUE----------------------------------------------- plotAllelicBiasResultsOverview(SNPhood.o, regions = NULL, plotChr = "chr21", signThreshold = 0.01, pValueSummary = "min") plotAllelicBiasResultsOverview(SNPhood.o, regions = 3:5, plotRegionBoundaries = TRUE, plotRegionLabels = TRUE, signThreshold = 0.01, pValueSummary = "min") ## ----allelicBias3, echo=TRUE, fig.height=7--------------------------------- plots = plotAllelicBiasResults(SNPhood.o, region = 2, signThreshold = 0.01, readGroupColors = c("blue", "red", "gray")) plots = plotAllelicBiasResults(SNPhood.o, region = 7, signThreshold = 0.01, readGroupColors = c("blue", "red", "gray")) ## ----clusterCountMatrix, echo=TRUE----------------------------------------- SNPhood.o = plotAndClusterMatrix(SNPhood.o, readGroup = "paternal", nClustersVec = 2, dataset = 1, verbose = FALSE) SNPhood.o = plotAndClusterMatrix(SNPhood.o, readGroup = "paternal", nClustersVec = 5, dataset = 1, verbose = FALSE) str(results(SNPhood.o, type = "clustering", elements = "paternal"), list.len = 3) ## ----clusterCountMatrix2, echo=TRUE---------------------------------------- SNPhood.o = plotAndClusterMatrix(SNPhood.o, readGroup = "paternal", nClustersVec = 5, dataset = 1, clustersToPlot = 2:5, verbose = FALSE) ## ----summarizeClusters, echo=TRUE------------------------------------------ p = plotClusterAverage(SNPhood.o, readGroup = "paternal", dataset = 1) ## ----associategenotype, echo=TRUE------------------------------------------ (mapping = data.frame(samples = annotationDatasets(SNPhood.o), genotypeFile = rep(fileGenotypes, 2), sampleName = c("NA10847", "NA12890") )) SNPhood.o = associateGenotypes(SNPhood.o, mapping) ## ----plotGenotypesPerSNP, echo=TRUE---------------------------------------- p = plotGenotypesPerSNP(SNPhood.o, regions = 1:20) ## ----strongAndWeakGenotype2, echo=TRUE------------------------------------- SNPhood_merged.o = mergeReadGroups(SNPhood.o) SNPhood_merged.o = plotAndCalculateWeakAndStrongGenotype(SNPhood_merged.o, normalize = TRUE, nClustersVec = 3) ## ----plotGenotypePerCluster, echo=TRUE------------------------------------- p = plotGenotypesPerCluster(SNPhood_merged.o, printPlot = FALSE, printBinLabels = TRUE, returnOnlyPlotNotObject = TRUE) p[[1]]