## ----echo=FALSE---------------------------------------------------------------
library(SomaticCancerAlterations)
library(GenomicRanges)
library(ggbio)

## ----list_datasets------------------------------------------------------------
all_datasets = scaListDatasets()
print(all_datasets)
meta_data = scaMetadata()
print(meta_data)

## ----load_dataset-------------------------------------------------------------
ov = scaLoadDatasets("ov_tcga", merge = TRUE)

## ----print--------------------------------------------------------------------
head(ov, 3)

## ----summary------------------------------------------------------------------
with(mcols(ov), table(Variant_Classification, Variant_Type))

## -----------------------------------------------------------------------------
head(sort(table(ov$Sample_ID), decreasing = TRUE))
head(sort(table(ov$Hugo_Symbol), decreasing = TRUE), 10)

## ----multiple_studies---------------------------------------------------------
three_studies = scaLoadDatasets(all_datasets[1:3])

print(elementNROWS(three_studies))

class(three_studies)

## ----merge_studies------------------------------------------------------------
merged_studies = scaLoadDatasets(all_datasets[1:3], merge = TRUE)

class(merged_studies)

## ----mutated_genes_study------------------------------------------------------
gene_study_count = with(mcols(merged_studies), table(Hugo_Symbol, Dataset))

gene_study_count = gene_study_count[order(apply(gene_study_count, 1, sum), decreasing = TRUE), ]

gene_study_count = addmargins(gene_study_count)

head(gene_study_count)

## ----subset_studies-----------------------------------------------------------
tp53_region = GRanges("17", IRanges(7571720, 7590863))

tp53_studies = subsetByOverlaps(merged_studies, tp53_region)

## ----variant_study_table------------------------------------------------------
addmargins(table(tp53_studies$Variant_Classification, tp53_studies$Dataset))

## ----mutateted_genes----------------------------------------------------------
fraction_mutated_region = function(y, region) {
    s = subsetByOverlaps(y, region)
    m = length(unique(s$Patient_ID)) / metadata(s)$Number_Patients
    return(m)
}

mutated_fraction = sapply(three_studies, fraction_mutated_region, tp53_region)

mutated_fraction = data.frame(name = names(three_studies), fraction =
mutated_fraction)

## ----plot_mutated_genes-------------------------------------------------------
library(ggplot2)

p = ggplot(mutated_fraction) + ggplot2::geom_bar(aes(x = name, y = fraction,
fill = name), stat = "identity") + ylim(0, 1) + xlab("Study") + ylab("Ratio") +
theme_bw()

print(p)

## ----echo=FALSE---------------------------------------------------------------
sessionInfo()