## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
# if (!requireNamespace("BiocManager", quietly=TRUE))
#     install.packages("BiocManager")
# BiocManager::install("fastreeR")

## ----eval=TRUE, message=FALSE-------------------------------------------------
options(java.parameters="-Xmx1G")
unloadNamespace("fastreeR")
library(fastreeR)
library(utils)
library(ape)
library(stats)
library(grid)
library(BiocFileCache)

## ----eval=TRUE----------------------------------------------------------------
bfc <- BiocFileCache::BiocFileCache(ask = FALSE)
tempVcfUrl <-
    paste0("https://ftp.1000genomes.ebi.ac.uk/vol1/ftp/data_collections/",
        "1000_genomes_project/release/20190312_biallelic_SNV_and_INDEL/",
        "supporting/related_samples/",
        "ALL.chrX.shapeit2_integrated_snvindels_v2a_related_samples_27022019.",
        "GRCh38.phased.vcf.gz")
tempVcf <- BiocFileCache::bfcquery(bfc,field = "rname", "tempVcf")$rpath[1]
if(is.na(tempVcf)) {
    tryCatch(
    { tempVcf <- BiocFileCache::bfcadd(bfc,"tempVcf",fpath=tempVcfUrl)[[1]]
    },
    error=function(cond) {
        tempVcf <- system.file("extdata", "samples.vcf.gz", package="fastreeR")
    },
    warning=function(cond) {
        tempVcf <- system.file("extdata", "samples.vcf.gz", package="fastreeR")
    }
    )
}
if(file.size(tempVcf) == 0L) {
    tempVcf <- system.file("extdata", "samples.vcf.gz", package="fastreeR")
}

## ----eval=TRUE----------------------------------------------------------------
tempFastasUrls <- c(
    #Mycobacterium liflandii
    paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/",
        "Mycobacterium_liflandii/latest_assembly_versions/",
        "GCF_000026445.2_ASM2644v2/GCF_000026445.2_ASM2644v2_genomic.fna.gz"),
    #Pelobacter propionicus
    paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/",
        "Pelobacter_propionicus/latest_assembly_versions/",
        "GCF_000015045.1_ASM1504v1/GCF_000015045.1_ASM1504v1_genomic.fna.gz"),
    #Rickettsia prowazekii
    paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/",
        "Rickettsia_prowazekii/latest_assembly_versions/",
        "GCF_000022785.1_ASM2278v1/GCF_000022785.1_ASM2278v1_genomic.fna.gz"),
    #Salmonella enterica
    paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/",
        "Salmonella_enterica/reference/",
        "GCF_000006945.2_ASM694v2/GCF_000006945.2_ASM694v2_genomic.fna.gz"),
    #Staphylococcus aureus
    paste0("https://ftp.ncbi.nih.gov/genomes/refseq/bacteria/",
        "Staphylococcus_aureus/reference/",
        "GCF_000013425.1_ASM1342v1/GCF_000013425.1_ASM1342v1_genomic.fna.gz")
)
tempFastas <- list()
for (i in seq(1,5)) {
    tempFastas[[i]] <- BiocFileCache::bfcquery(bfc,field = "rname", 
                                                paste0("temp_fasta",i))$rpath[1]
    if(is.na(tempFastas[[i]])) {
        tryCatch(
        { tempFastas[[i]] <- 
            BiocFileCache::bfcadd(bfc, paste0("temp_fasta",i), 
                                                fpath=tempFastasUrls[i])[[1]]
        },
        error=function(cond) {
            tempFastas <- system.file("extdata", "samples.fasta.gz", 
                                                        package="fastreeR")
            break
        },
        warning=function(cond) {
            tempFastas <- system.file("extdata", "samples.fasta.gz", 
                                                        package="fastreeR")
            break
        }
        )
    }
    if(!file.exists(tempFastas[[i]])) {
        tempFastas <- system.file("extdata", "samples.fasta.gz", 
                                                        package="fastreeR")
        break
    }
    if(file.size(tempFastas[[i]]) == 0L) {
        tempFastas <- system.file("extdata", "samples.fasta.gz", 
                                                        package="fastreeR")
        break
    }
}

## ----echo=TRUE, fig.cap="Sample statistics from vcf file", fig.wide=TRUE------
myVcfIstats <- fastreeR::vcf2istats(inputFile = tempVcf)
plot(myVcfIstats[,7:9])

## ----eval=TRUE----------------------------------------------------------------
myVcfDist <- fastreeR::vcf2dist(inputFile = tempVcf, threads = 2)

## ----echo=TRUE, fig.cap="Histogram of distances from vcf file", fig.wide=TRUE----
graphics::hist(myVcfDist, breaks = 100, main=NULL, 
                                xlab = "Distance", xlim = c(0,max(myVcfDist)))

## ----echo=TRUE, fig.cap="Tree from vcf with fastreeR", fig.wide=TRUE----------
myVcfTree <- fastreeR::dist2tree(inputDist = myVcfDist)
plot(ape::read.tree(text = myVcfTree), direction = "down", cex = 0.3)
ape::add.scale.bar()
ape::axisPhylo(side = 2)

## ----echo=TRUE, fig.cap="Tree from vcf with fastreeR", fig.wide=TRUE----------
myVcfTree <- fastreeR::vcf2tree(inputFile = tempVcf, threads = 2)
plot(ape::read.tree(text = myVcfTree), direction = "down", cex = 0.3)
ape::add.scale.bar()
ape::axisPhylo(side = 2)

## ----echo=TRUE, fig.cap="Tree from vcf with stats::hclust", fig.wide=TRUE-----
myVcfTreeStats <- stats::hclust(myVcfDist)
plot(myVcfTreeStats, ann = FALSE, cex = 0.3)

## ----eval=TRUE----------------------------------------------------------------
myVcfClust <- fastreeR::dist2clusters(inputDist = myVcfDist, cutHeight = 0.067)
if (length(myVcfClust) > 1) {
    tree <- myVcfClust[[1]]
    clusters <- myVcfClust[[2]]
    tree
    clusters
}

## ----eval=TRUE----------------------------------------------------------------
myFastaDist <- fastreeR::fasta2dist(tempFastas, kmer = 6)

## ----eval=FALSE---------------------------------------------------------------
# myFastaDist <- fastreeR::fasta2dist(
#     system.file("extdata", "samples.fasta.gz", package="fastreeR"), kmer = 6)

## ----echo=TRUE, fig.cap="Histogram of distances from fasta file",fig.wide=TRUE----
graphics::hist(myFastaDist, breaks = 100, main=NULL, 
                                xlab="Distance", xlim = c(0,max(myFastaDist)))

## ----echo=TRUE, fig.cap="Tree from fasta with fastreeR", fig.wide=TRUE--------
myFastaTree <- fastreeR::dist2tree(inputDist = myFastaDist)
plot(ape::read.tree(text = myFastaTree), direction = "down", cex = 0.3)
ape::add.scale.bar()
ape::axisPhylo(side = 2)

## ----echo=TRUE, fig.cap="Tree from fasta with stats::hclust", fig.wide=TRUE----
myFastaTreeStats <- stats::hclust(myFastaDist)
plot(myFastaTreeStats, ann = FALSE, cex = 0.3)

## ----setup--------------------------------------------------------------------
utils::sessionInfo()