## ----setup, include = FALSE---------------------------------------------- has_gcloud <- AnVILBase::has_avworkspace( strict = TRUE, platform = AnVILGCP::gcp() ) knitr::opts_chunk$set( eval = has_gcloud, collapse = TRUE, cache = TRUE ) options(width=75) ## ----install-anvil, eval = FALSE----------------------------------------- # if (!requireNamespace("BiocManager", quietly = TRUE)) # install.packages("BiocManager", repos = "https://cran.r-project.org") # BiocManager::install("AnVIL") ## ----load-packages, message =FALSE, eval = TRUE, cache = FALSE----------- library(AnVILGCP) library(AnVIL) ## ----gcloud-sdk-path, eval = FALSE--------------------------------------- # dir(file.path(Sys.getenv("GCLOUD_SDK_PATH"), "bin"), "^(gcloud|gsutil)$") # ## [1] "gcloud" "gsutil" ## ----gcloud-exists, eval = TRUE------------------------------------------ ## the code chunks in this vignette are fully evaluated when ## gcloud_exists() returns TRUE GCPtools::gcloud_exists() ## ----install-genomicfeatures, eval = FALSE------------------------------- # BiocManager::install("GenomicFeatures") ## ----add-libpaths, eval = FALSE------------------------------------------ # add_libpaths("~/my/project") ## ----gcloud-account-project, eval = has_gcloud--------------------------- # gcloud_account() # authentication account # gcloud_project() # billing project information ## ----gcloud-projects-list, eval = has_gcloud----------------------------- # gcloud_cmd("projects", "list") |> # readr::read_table() |> # filter(startsWith(PROJECT_ID, "anvil")) ## ----gcloud-help, eval = FALSE------------------------------------------- # gcloud_help("projects") ## ----gsutil-src---------------------------------------------------------- # src <- "gs://genomics-public-data/1000-genomes/" ## ----gsutil-ls-stat, eval = has_gcloud----------------------------------- # gsutil_ls(src) # # other <- paste0(src, "other") # gsutil_ls(other, recursive = TRUE) # # sample_info <- paste0(src, "other/sample_info/sample_info.csv") # gsutil_stat(sample_info) ## ----gsutil-cp, eval = has_gcloud---------------------------------------- # fl <- tempfile() # gsutil_cp(sample_info, fl) # # csv <- readr::read_csv(fl, guess_max = 5000L, col_types = readr::cols()) # csv ## ----gsutil-pipe, eval = has_gcloud-------------------------------------- # pipe <- gsutil_pipe(fl, "rb") # readr::read_csv(pipe, guess_max = 5000L, col_types = readr::cols()) |> # dplyr::select("Sample", "Family_ID", "Population", "Gender") ## ----gsutil-rsync, eval = has_gcloud------------------------------------- # destination <- tempfile() # stopifnot(dir.create(destination)) # source <- paste0(src, "other/sample_info") # # ## dry run # gsutil_rsync(source, destination) # # gsutil_rsync(source, destination, dry = FALSE) # dir(destination, recursive = TRUE) # # ## nothing to synchronize # gsutil_rsync(source, destination, dry = FALSE) # # ## one file requires synchronization # unlink(file.path(destination, "README")) # gsutil_rsync(source, destination, dry = FALSE) ## ----workspace-data-image, echo = FALSE, cache = FALSE------------------- # knitr::include_graphics('images/AnVIL-Workspace-Data.png') ## ----avworkspace-set-hidden, include = FALSE, cache = FALSE, eval = has_gcloud---- # avworkspace_namespace("pathogen-genomic-surveillance") # avworkspace_name("COVID-19") ## ----avworkspace-get, eval = has_gcloud---------------------------------- # avworkspace_namespace() # avworkspace_name() ## ----avworkspace-set, eval = has_gcloud---------------------------------- # ## N.B.: IT MAY NOT BE NECESSARY TO SET THESE WHEN ON ANVIL # avworkspace_namespace("pathogen-genomic-surveillance") # avworkspace_name("COVID-19") ## ----avtables-avtable, eval = has_gcloud--------------------------------- # avtables() # sample <- avtable("sample") # sample ## ----avtable-manipulation, eval = has_gcloud----------------------------- # sample |> # select("sample_id", contains("fasta")) |> # filter(!is.na(final_assembly_fasta)) ## ----avtable-import-example, eval = FALSE-------------------------------- # my_cars <- # mtcars |> # as_tibble(rownames = "model") |> # mutate(model = gsub(" ", "_", model)) # job_status <- avtable_import(my_cars) ## ----avtable-import-status-example, eval = FALSE------------------------- # avtable_import_status(job_status) ## ----avtable-import-pagesize-example, eval = FALSE----------------------- # (job_status <- avtable_import(my_cars, pageSize = 10)) # ## pageSize = 10 rows (4 pages) # ## |===================================================================| 100% # ## # A tibble: 4 × 5 # ## page from_row to_row job_id status # ## # ## 1 1 1 10 a32e9706-f63c-49ed-9620-b214746b9392 Uploaded # ## 2 2 11 20 f2910ac2-0954-4fb9-b36c-970845a266b7 Uploaded # ## 3 3 21 30 e18adc5b-d26f-4a8a-a0d7-a232e17ac8d2 Uploaded # ## 4 4 31 32 d14efb89-e2dd-4937-b80a-169520b5f563 Uploaded # (job_status <- avtable_import_status(job_status)) # ## checking status of 4 avtable import jobs # ## |===================================================================| 100% # ## # A tibble: 4 × 5 # ## page from_row to_row job_id status # ## # ## 1 1 1 10 a32e9706-f63c-49ed-9620-b214746b9392 Done # ## 2 2 11 20 f2910ac2-0954-4fb9-b36c-970845a266b7 Done # ## 3 3 21 30 e18adc5b-d26f-4a8a-a0d7-a232e17ac8d2 ReadyForUpsert # ## 4 4 31 32 d14efb89-e2dd-4937-b80a-169520b5f563 ReadyForUpsert # (job_status <- avtable_import_status(job_status)) # ## checking status of 4 avtable import jobs # ## |===================================================================| 100% # ## # A tibble: 4 × 5 # ## page from_row to_row job_id status # ## # ## 1 1 1 10 a32e9706-f63c-49ed-9620-b214746b9392 Done # ## 2 2 11 20 f2910ac2-0954-4fb9-b36c-970845a266b7 Done # ## 3 3 21 30 e18adc5b-d26f-4a8a-a0d7-a232e17ac8d2 Done # ## 4 4 31 32 d14efb89-e2dd-4937-b80a-169520b5f563 Done ## ----avtable-import-set-example, eval = FALSE---------------------------- # ## editable copy of '1000G-high-coverage-2019' workspace # avworkspace("anvil-datastorage/1000G-high-coverage-2019") # sample <- # avtable("sample") |> # existing table # mutate(set = sample(head(LETTERS), nrow(.), TRUE)) # arbitrary groups # sample |> # new 'participant_set' table # avtable_import_set("participant", "set", "participant") # sample |> # new 'sample_set' table # avtable_import_set("sample", "set", "name") ## ----avdata, eval = has_gcloud------------------------------------------- # avdata() ## ----avbucket, eval = has_gcloud----------------------------------------- # bucket <- avbucket() # bucket ## ----avfiles_ls, eval = has_gcloud--------------------------------------- # avfiles_ls() ## ----write-to-bucket-example, eval = FALSE------------------------------- # ## requires workspace ownership # uri <- avbucket() # discover bucket # bucket <- file.path(uri, "mtcars.tab") # write.table(mtcars, gsutil_pipe(bucket, "w")) # write to bucket ## ----avfiles-backup-cwd, eval = FALSE------------------------------------ # ## backup all files and folders in the current working directory # avfiles_backup(getwd(), recursive = TRUE) ## ----avfiles-backup-dir, eval = FALSE------------------------------------ # ## backup all files in the current directory # avfiles_backup(dir()) ## ----avfiles-backup-scratch, eval = FALSE-------------------------------- # ## backup all files to gs:///scratch/ # avfiles_backup(dir, paste0(avbucket(), "/scratch")) ## ----drs-uri-example, eval = has_gcloud---------------------------------- # uri <- c( # vcf = "drs://dg.ANV0/6f633518-f2de-4460-aaa4-a27ee6138ab5", # tbi = "drs://dg.ANV0/4fb9e77f-c92a-4deb-ac90-db007dc633aa" # ) ## ----drs-stat-example, eval = FALSE-------------------------------------- # tbl <- drs_stat(uri) # ## # A tibble: 2 × 9 # ## drs fileName size gsUri accessUrl timeUpdated hashes bucket name # ## # ## 1 drs://d… NA21144… 7.06e9 gs:/… NA 2020-07-08… fc-56… CCDG… # ## 2 drs://d… NA21144… 4.08e6 gs:/… NA 2020-07-08… fc-56… CCDG… ## ----drs-cp-example, eval = FALSE---------------------------------------- # drs_cp(uri, "/tmp") # local temporary directory # drs_cp(uri, avbucket()) # workspace bucket ## ----drs-access-url-example, eval = FALSE-------------------------------- # suppressPackageStartupMessages({ # library(VariantAnnotation) # }) # https <- drs_access_url(uri) # vcffile <- VcfFile(https[["vcf"]], https[["tbi"]]) # scanVcfHeader(vcffile) # ## class: VCFHeader # ## samples(1): NA21144 # ## meta(3): fileformat reference contig # ## fixed(2): FILTER ALT # ## info(16): BaseQRankSum ClippingRankSum ... ReadPosRankSum VariantType # ## geno(11): GT AB ... PL SB # # variants <- readVcf(vcffile, param = GRanges("chr1:1-1000000")) # nrow(variants) # ## [1] 123077 ## ----terra-api, eval = has_gcloud---------------------------------------- # terra <- Terra() ## ----terra-summary, eval = has_gcloud------------------------------------ # terra ## ----terra-tags, eval = has_gcloud--------------------------------------- # terra |> tags("Status") ## ----terra-status-access, eval = has_gcloud------------------------------ # terra$status ## ----terra-status-call, eval = has_gcloud-------------------------------- # terra$status() ## ----terra-create-billing-args, eval = has_gcloud------------------------ # args(terra$createBillingProjectFull) ## ----terra-overwrite-args, eval = has_gcloud----------------------------- # args(terra$overwriteWorkspaceMethodConfig) ## ----terra-status-response, eval = has_gcloud---------------------------- # status <- terra$status() # class(status) ## ----terra-status-str, eval = has_gcloud--------------------------------- # str(status) ## ----terra-status-flatten-example, eval = has_gcloud--------------------- # lst <- status |> as.list() # lengths(lst) # lengths(lst$systems) # str(lst$systems) ## ----my-service-class---------------------------------------------------- # .MyService <- setClass("MyService", contains = "Service") # # MyService <- # function() # { # .MyService(Service( # "myservice", # host = "api.firecloud.org", # api_url = "https://api.firecloud.org/api-docs.yaml", # authenticate = FALSE # )) # } ## ----sessionInfo, echo=FALSE--------------------------------------------- # sessionInfo()