Contents

1 Introduction

Many genetic studies employ PLINK for data management and analysis.

This package facilitates interrogation of PLINK bed files through the Bioconductor DelayedArray protocol

2 Installation

Use Bioconductor 3.23.

BiocManager::install("PlinkMatrix")

3 Demonstration

The example_PlinkMatrix function will

The example data are those distributed with tensorQTL, transformed to bed using plink2.

library(PlinkMatrix)
gdemo <- example_PlinkMatrix()
colnames(gdemo) <- gsub("0_", "", colnames(gdemo))
gdemo
## <367759 x 445> DelayedMatrix object of type "double":
##                         HG00096 HG00097 HG00099 ... NA20826 NA20828
##     chr18_10644_C_G_b38       0       0       0   .       0       0
##     chr18_10847_C_A_b38       0       0       0   .       0       0
##     chr18_11275_G_A_b38       0       0       0   .       0       0
##     chr18_11358_G_A_b38       0       0       0   .       0       0
##     chr18_11445_G_A_b38       0       0       0   .       0       0
##                     ...       .       .       .   .       .       .
## chr18_80259028_AG_A_b38       1       2       2   .       0       1
##  chr18_80259147_G_C_b38       0       0       0   .       0       0
##  chr18_80259181_A_G_b38       0       0       0   .       0       0
##  chr18_80259190_C_G_b38       1       0       0   .       1       0
##  chr18_80259245_C_A_b38       0       0       0   .       0       0

4 Sanity check

We will use ancestry information and approximate PCA to illustrate plausibility of the approach used here.

We have codes for the ancestral origins of samples.

data(g445samples)
table(g445samples$Population.code)
## 
## CEU FIN GBR TSI YRI 
##  89  92  86  91  87

We’ll take a random sample of 1000 SNP and retrieve 10 PCs, then visualize aspects of the reexpression to PCs for discriminating population of ancestry.

library(irlba)
set.seed(1234)
ss <- sort(sample(seq_len(nrow(gdemo)), size = 1000))
pca <- prcomp_irlba(t(gdemo[ss, ]), 10)
pairs(pca$x[, 1:4], col = factor(g445samples$Population.code), pch = 19, cex = .5)

boxplot(split(pca$x[, 2] + pca$x[, 1], g445samples$Population.code))

5 SummarizedExperiment wrapper; subsetting with GenomicRanges

data(example_GRanges)
library(SummarizedExperiment)
nse <- SummarizedExperiment(list(genotypes = gdemo),
  rowData = example_GRanges, colData = g445samples
)
nse
## class: RangedSummarizedExperiment 
## dim: 367759 445 
## metadata(0):
## assays(1): genotypes
## rownames(367759): chr18_10644_C_G_b38 chr18_10847_C_A_b38 ...
##   chr18_80259190_C_G_b38 chr18_80259245_C_A_b38
## rowData names(0):
## colnames(445): HG00096 HG00097 ... NA20826 NA20828
## colData names(5): Sex Population.code Population.name
##   Superpopulation.code Superpopulation.name
little <- GenomicRanges::GRanges("chr18:1-100000")
subsetByOverlaps(nse, little)
## class: RangedSummarizedExperiment 
## dim: 353 445 
## metadata(0):
## assays(1): genotypes
## rownames(353): chr18_10644_C_G_b38 chr18_10847_C_A_b38 ...
##   chr18_99977_C_T_b38 chr18_99998_G_A_b38
## rowData names(0):
## colnames(445): HG00096 HG00097 ... NA20826 NA20828
## colData names(5): Sex Population.code Population.name
##   Superpopulation.code Superpopulation.name

6 Session information

sessionInfo()
## R version 4.6.0 alpha (2026-04-05 r89794)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 24.04.4 LTS
## 
## Matrix products: default
## BLAS:   /home/biocbuild/bbs-3.23-bioc/R/lib/libRblas.so 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.12.0  LAPACK version 3.12.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_GB              LC_COLLATE=C              
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: America/New_York
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] irlba_2.3.7                 PlinkMatrix_0.99.8         
##  [3] SummarizedExperiment_1.41.1 Biobase_2.71.0             
##  [5] GenomicRanges_1.63.2        Seqinfo_1.1.0              
##  [7] DelayedArray_0.37.1         SparseArray_1.11.13        
##  [9] S4Arrays_1.11.1             abind_1.4-8                
## [11] IRanges_2.45.0              S4Vectors_0.49.2           
## [13] MatrixGenerics_1.23.0       matrixStats_1.5.0          
## [15] BiocGenerics_0.57.1         generics_0.1.4             
## [17] Matrix_1.7-5                Rcpp_1.1.1-1               
## [19] BiocStyle_2.39.0           
## 
## loaded via a namespace (and not attached):
##  [1] rappdirs_0.3.4      sass_0.4.10         RSQLite_2.4.6      
##  [4] lattice_0.22-9      magrittr_2.0.5      digest_0.6.39      
##  [7] evaluate_1.0.5      grid_4.6.0          bookdown_0.46      
## [10] fastmap_1.2.0       blob_1.3.0          jsonlite_2.0.0     
## [13] DBI_1.3.0           tinytex_0.59        BiocManager_1.30.27
## [16] purrr_1.2.2         httr2_1.2.2         jquerylib_0.1.4    
## [19] cli_3.6.6           rlang_1.2.0         dbplyr_2.5.2       
## [22] XVector_0.51.0      bit64_4.6.0-1       withr_3.0.2        
## [25] cachem_1.1.0        yaml_2.3.12         otel_0.2.0         
## [28] tools_4.6.0         memoise_2.0.1       dplyr_1.2.1        
## [31] filelock_1.0.3      curl_7.0.0          vctrs_0.7.3        
## [34] R6_2.6.1            magick_2.9.1        lifecycle_1.0.5    
## [37] BiocFileCache_3.1.0 bit_4.6.0           pkgconfig_2.0.3    
## [40] pillar_1.11.1       bslib_0.10.0        glue_1.8.1         
## [43] tidyselect_1.2.1    tibble_3.3.1        xfun_0.57          
## [46] knitr_1.51          htmltools_0.5.9     rmarkdown_2.31     
## [49] compiler_4.6.0