## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE, tidy = FALSE ) library(BiocStyle) python_available <- tryCatch({ proc <- basilisk::basiliskStart(immLynx::immLynxEnv) on.exit(basilisk::basiliskStop(proc)) TRUE }, error = function(e) { FALSE }) ## ----load--------------------------------------------------------------------- library(immLynx) library(scran) library(scater) library(ggplot2) data("immLynx_example") ## ----clustcr-compare, eval=python_available----------------------------------- # sce_mcl_2 <- runClustTCR(immLynx_example, # inflation = 2.0, # column_prefix = "mcl_2") # sce_mcl_3 <- runClustTCR(sce_mcl_2, # inflation = 3.0, # column_prefix = "mcl_3") # # cat("MCL (inflation=2):", # length(unique(na.omit(sce_mcl_3$mcl_2_TRB))), # "clusters\n") # cat("MCL (inflation=3):", # length(unique(na.omit(sce_mcl_3$mcl_3_TRB))), # "clusters\n") ## ----esm-models, eval=FALSE--------------------------------------------------- # sce_small <- runEmbeddings( # immLynx_example, # model_name = "facebook/esm2_t12_35M_UR50D", # reduction_name = "esm_small" # ) # # sce_med <- runEmbeddings( # sce_small, # model_name = "facebook/esm2_t33_650M_UR50D", # reduction_name = "esm_medium" # ) # # sce_med <- scater::runUMAP(sce_med, # dimred = "esm_small", # name = "umap_small") # sce_med <- scater::runUMAP(sce_med, # dimred = "esm_medium", # name = "umap_medium") # # p1 <- scater::plotReducedDim(sce_med, # dimred = "umap_small") + # ggtitle("ESM-2 35M") # p2 <- scater::plotReducedDim(sce_med, # dimred = "umap_medium") + # ggtitle("ESM-2 650M") # p1 + p2 ## ----both-chains, eval=FALSE-------------------------------------------------- # sce_paired <- runEmbeddings( # immLynx_example, # chains = "both", # reduction_name = "tcr_paired" # ) # # sce_paired <- scater::runUMAP(sce_paired, # dimred = "tcr_paired") # scater::plotReducedDim(sce_paired, dimred = "UMAP") ## ----screpertoire-integration, eval=python_available-------------------------- # sce_clust <- runClustTCR(immLynx_example, # chains = "TRB") # # comparison <- table( # clustcr = sce_clust$clustcr_TRB, # clonotype = sce_clust$CTstrict # ) # # cat("Number of clustcr clusters:", nrow(comparison), "\n") # cat("Number of unique clonotypes:", ncol(comparison), "\n") ## ----selection, eval=python_available----------------------------------------- # sce_pgen <- runOLGA(immLynx_example, # chains = "TRB") # # ggplot(as.data.frame(colData(sce_pgen)), aes(x = Type, # y = olga_pgen_log10_TRB)) + # geom_boxplot() + # labs(title = "TCR Generation Probability by Sample Type", # x = "Sample Type", # y = "log10(Pgen)") # # low_pgen_cells <- which(colData(sce_pgen)$olga_pgen_log10_TRB < -15) # cat("Cells with Pgen < 10^-15:", length(low_pgen_cells), "\n") ## ----combined-distance, eval=python_available--------------------------------- # dist_results <- runTCRdist(immLynx_example, chains = "beta") # # d <- as.dist(dist_results$distances$pw_beta) # hc <- hclust(d, method = "complete") # n_unique <- nrow(dist_results$distances$pw_beta) # clusters <- cutree(hc, k = min(50, n_unique)) # # cat("Distance matrix:", n_unique, "x", n_unique, "\n") # cat("Hierarchical clusters:", length(unique(clusters)), "\n") ## ----large-data, eval=FALSE--------------------------------------------------- # sce_large <- runEmbeddings( # large_sce, # chunk_size = 16, # pool = "mean" # ) # # sce_large <- runClustTCR( # large_sce, # inflation = 2.0 # ) # # sample_cells <- sample(colnames(large_sce), 5000) # subset_obj <- large_sce[, sample_cells] # dist_results <- runTCRdist(subset_obj) ## ----hla, eval=python_available----------------------------------------------- # metaclones <- runMetaclonotypist(immLynx_example, # return_input = FALSE) # # hla_data <- data.frame( # barcode = metaclones$barcode, # HLA_A_01_01 = sample(c(TRUE, FALSE), # nrow(metaclones), # replace = TRUE), # HLA_A_02_01 = sample(c(TRUE, FALSE), # nrow(metaclones), # replace = TRUE), # HLA_B_07_02 = sample(c(TRUE, FALSE), # nrow(metaclones), # replace = TRUE) # ) # # hla_results <- runHLAassociation(metaclones, # hla_data) # head(hla_results) ## ----export, eval=FALSE------------------------------------------------------- # tcr_data <- extractTCRdata(immLynx_example, # chains = "both", # format = "wide") # tcrdist_format <- convertToTcrdist(tcr_data, # chains = "both") # write.csv(tcrdist_format, "tcr_for_tcrdist.csv", row.names = FALSE) # # clusters <- data.frame( # barcode = colnames(sce_clust), # clustcr = sce_clust$clustcr_TRB # ) # write.csv(clusters, "cluster_assignments.csv", row.names = FALSE) # # embeddings <- reducedDim(sce_small, "esm_small") # write.csv(embeddings, "tcr_embeddings.csv") ## ----validate, eval=FALSE----------------------------------------------------- # validation <- validateTCRdata(tcr_data, check_sequences = TRUE) # if (!validation$valid) { # warning(validation$errors) # } ## ----seed, eval=FALSE--------------------------------------------------------- # set.seed(42) # sce <- runClustTCR(sce) ## ----session, eval=TRUE------------------------------------------------------- sessionInfo()