## ---- echo=FALSE, results="hide"---------------------------------------------- knitr::opts_chunk$set(error=FALSE, warning=FALSE, message=FALSE) library(BiocStyle) set.seed(10918) ## ----------------------------------------------------------------------------- library(scRNAseq) sce <- ZeiselBrainData() sce ## ----------------------------------------------------------------------------- library(scuttle) is.mito <- grep("mt-", rownames(sce)) per.cell <- perCellQCMetrics(sce, subsets=list(Mito=is.mito)) summary(per.cell$sum) summary(per.cell$detected) summary(per.cell$subsets_Mito_percent) ## ----------------------------------------------------------------------------- colData(sce) <- cbind(colData(sce), per.cell) ## ----------------------------------------------------------------------------- sce2 <- addPerCellQC(sce, subsets=list(Mito=is.mito)) colnames(colData(sce2)) ## ----------------------------------------------------------------------------- keep.total <- !isOutlier(per.cell$sum, type="lower", log=TRUE) filtered <- sce[,keep.total] ## ----------------------------------------------------------------------------- qc.stats <- quickPerCellQC(per.cell, percent_subsets="subsets_Mito_percent") colSums(as.matrix(qc.stats)) filtered <- sce[,!qc.stats$discard] ## ----------------------------------------------------------------------------- # Pretending that the first 10 cells are empty wells, for demonstration. per.feat <- perFeatureQCMetrics(sce, subsets=list(Empty=1:10)) summary(per.feat$mean) summary(per.feat$detected) summary(per.feat$subsets_Empty_ratio) ## ----------------------------------------------------------------------------- ave <- calculateAverage(sce) summary(ave) ## ----------------------------------------------------------------------------- summary(librarySizeFactors(sce)) ## ----------------------------------------------------------------------------- summary(geometricSizeFactors(sce)) summary(medianSizeFactors(sce)) ## ----------------------------------------------------------------------------- sce <- computeLibraryFactors(sce) summary(sizeFactors(sce)) ## ----------------------------------------------------------------------------- sce <- logNormCounts(sce) assayNames(sce) ## ----------------------------------------------------------------------------- assay(sce, "normed") <- normalizeCounts(sce, size.factors=runif(ncol(sce)), pseudo.count=1.5) ## ----------------------------------------------------------------------------- assay(sce, "cpm") <- calculateCPM(sce) ## ----------------------------------------------------------------------------- agg.sce <- aggregateAcrossCells(sce, ids=sce$level1class) head(assay(agg.sce)) colData(agg.sce)[,c("ids", "ncells")] ## ----------------------------------------------------------------------------- agg.sce <- aggregateAcrossCells(sce, ids=colData(sce)[,c("level1class", "tissue")]) head(assay(agg.sce)) colData(agg.sce)[,c("level1class", "tissue", "ncells")] ## ----------------------------------------------------------------------------- agg.feat <- sumCountsAcrossFeatures(sce, ids=list(GeneSet1=1:10, GeneSet2=11:50, GeneSet3=1:100), average=TRUE, exprs_values="logcounts") agg.feat[,1:10] ## ----------------------------------------------------------------------------- agg.n <- numDetectedAcrossCells(sce, ids=colData(sce)[,c("level1class", "tissue")]) head(assay(agg.n)) ## ----------------------------------------------------------------------------- # Mocking up a dataset to demonstrate: outfile <- tempfile() write.table(counts(sce)[1:100,], file=outfile, sep="\t", quote=FALSE) # Reading it in as a sparse matrix: output <- readSparseCounts(outfile) class(output) ## ----------------------------------------------------------------------------- # Original row names are Ensembl IDs. sce.ens <- ZeiselBrainData(ensembl=TRUE) head(rownames(sce.ens)) # Replacing with guaranteed unique and non-missing symbols: rownames(sce.ens) <- uniquifyFeatureNames( rownames(sce.ens), rowData(sce.ens)$originalName ) head(rownames(sce.ens)) ## ----------------------------------------------------------------------------- out <- makePerCellDF(sce, features="Tspan12") colnames(out) ## ----------------------------------------------------------------------------- out2 <- makePerFeatureDF(sce, cells=c("1772063062_D05", "1772063061_D01", "1772060240_F02", "1772062114_F05")) colnames(out2) ## ----------------------------------------------------------------------------- sessionInfo()