\name{crlmm} \alias{crlmm} \alias{crlmm2} \title{Genotype oligonucleotide arrays with CRLMM} \description{ This is a faster and more efficient implementation of the CRLMM algorithm, especially designed for Affymetrix SNP 5 and 6 arrays (to be soon extended to other platforms). } \usage{ crlmm(filenames, row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, save.it=FALSE, load.it=FALSE, intensityFile, mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, recallRegMin=1000, returnParams=FALSE, badSNP=0.7) crlmm2(filenames, row.names=TRUE, col.names=TRUE, probs=c(1/3, 1/3, 1/3), DF=6, SNRMin=5, gender=NULL, save.it=FALSE, load.it=FALSE, intensityFile, mixtureSampleSize=10^5, eps=0.1, verbose=TRUE, cdfName, sns, recallMin=10, recallRegMin=1000, returnParams=FALSE, badSNP=0.7) } \arguments{ \item{filenames}{'character' vector with CEL files to be genotyped.} \item{row.names}{'logical'. Use rownames - SNP names?} \item{col.names}{'logical'. Use colnames - Sample names?} \item{probs}{'numeric' vector with priors for AA, AB and BB.} \item{DF}{'integer' with number of degrees of freedom to use with t-distribution.} \item{SNRMin}{'numeric' scalar defining the minimum SNR used to filter out samples.} \item{gender}{'integer' vector, with same length as 'filenames', defining sex. (1 - male; 2 - female)} \item{save.it}{'logical'. Save preprocessed data?} \item{load.it}{'logical'. Load preprocessed data to speed up analysis?} \item{intensityFile}{'character' with filename to be saved/loaded - preprocessed data.} \item{mixtureSampleSize}{Number of SNP's to be used with the mixture model.} \item{eps}{Minimum change for mixture model.} \item{verbose}{'logical'.} \item{cdfName}{'character' defining the CDF name to use ('GenomeWideSnp5', 'GenomeWideSnp6')} \item{sns}{'character' vector with sample names to be used.} \item{recallMin}{Minimum number of samples for recalibration.} \item{recallRegMin}{Minimum number of SNP's for regression.} \item{returnParams}{'logical'. Return recalibrated parameters.} \item{badSNP}{'numeric'. Threshold to flag as bad SNP (affects batchQC)} } \value{ A \code{SnpSet} object. \item{calls}{Genotype calls (1 - AA, 2 - AB, 3 - BB)} \item{confs}{Confidence scores 'round(-1000*log2(1-p))'} \item{SNPQC}{SNP Quality Scores} \item{batchQC}{Batch Quality Score} \item{params}{Recalibrated parameters} } \details{ 'crlmm2' allows one to genotype very large datasets (via ff package) and also permits the use of clusters or multiple cores (via snow package) to speed up genotyping. } \references{ Carvalho B, Bengtsson H, Speed TP, Irizarry RA. Exploration, normalization, and genotype calls of high-density oligonucleotide SNP array data. Biostatistics. 2007 Apr;8(2):485-99. Epub 2006 Dec 22. PMID: 17189563. Carvalho BS, Louis TA, Irizarry RA. Quantifying uncertainty in genotype calls. Bioinformatics. 2010 Jan 15;26(2):242-9. } \examples{ ## this can be slow if (require(genomewidesnp6Crlmm) & require(hapmapsnp6)){ path <- system.file("celFiles", package="hapmapsnp6") ## the filenames with full path... ## very useful when genotyping samples not in the working directory cels <- list.celfiles(path, full.names=TRUE) (crlmmOutput <- crlmm(cels)) } \dontrun{ ## HPC Example library(ff) library(snow) library(crlmm) ## genotype 50K SNPs at a time ocProbesets(50000) ## setup cluster - 8 cores on the machine setCluster(8, "SOCK") path <- system.file("celFiles", package="hapmapsnp6") cels <- list.celfiles(path, full.names=TRUE) crlmmOutput <- crlmm2(cels) } } \keyword{classif}