## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----setup-------------------------------------------------------------------- library(MetaboDynamics) library(SummarizedExperiment) # storing and manipulating simulated metabolomics data library(ggplot2) # visualization library(dplyr) # data handling library(tidyr) # data handling ## ----------------------------------------------------------------------------- data("longitudinalMetabolomics", package = "MetaboDynamics") ## ----fig.wide=TRUE------------------------------------------------------------ # convert to dataframe abundances <- as.data.frame(cbind( as.data.frame(t(assays(longitudinalMetabolomics)$concentrations)), as.data.frame(colData(longitudinalMetabolomics)) )) abundances <- abundances %>% pivot_longer( cols = seq_len(4), names_to = "time", values_to = "abundance" ) ggplot(abundances, aes(x = abundance)) + geom_freqpoly() + theme_bw() + facet_grid(cols = vars(time), rows = vars(condition)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + ggtitle("raw data", "raw measurements") ## ----fig.wide=TRUE------------------------------------------------------------ ggplot(abundances, aes(x = abundance)) + geom_density() + scale_x_log10() + theme_bw() + facet_grid(cols = vars(time), rows = vars(condition)) + ggtitle("data", "log-transformed values") ## ----fig.wide=TRUE------------------------------------------------------------ ggplot(abundances) + geom_line(aes( x = time, y = abundance, col = metabolite, group = interaction(metabolite, replicate) )) + scale_y_log10() + theme_bw() + xlab("timepoint") + scale_color_viridis_d() + theme(legend.position = "none") + facet_grid(rows = vars(condition)) + ggtitle("raw metabolite dynamics", "color=metabolite") ## ----fig.wide=TRUE------------------------------------------------------------ data("longitudinalMetabolomics") # convert to dataframe abundances <- as.data.frame(cbind( as.data.frame(t(assays(longitudinalMetabolomics)$scaled_log)), as.data.frame(colData(longitudinalMetabolomics)) )) abundances <- abundances %>% pivot_longer( cols = seq_len(4), names_to = "time", values_to = "abundance" ) ggplot(abundances) + geom_line(aes( x = time, y = abundance, col = metabolite, group = interaction(metabolite, replicate) )) + theme_bw() + scale_color_viridis_d() + xlab("timepoint") + ylab("deviation from mean abundance") + facet_grid(rows = vars(condition)) + theme(legend.position = "none") + ggtitle("standardized dynamics", "color=metabolite") ## ----fig.wide=TRUE------------------------------------------------------------ # we can hand a SummarizedExperiment object to the function data("longitudinalMetabolomics") # we only use a subsection of the simulated data (1 condition and subsample of # the whole dataset) for demonstration purposes samples <- c( "UMP", "PEP", "2-Aminomuconate","ATP" ) # only one condition data <- longitudinalMetabolomics[, longitudinalMetabolomics$condition %in% c("A", "B") & longitudinalMetabolomics$metabolite %in% samples] # fit model data <- fit_dynamics_model( model = "scaled_log", data = data, scaled_measurement = "m_scaled", assay = "scaled_log", adapt_delta = 0.95, # default 0.95 iter = 2000, cores = 1, chains = 1 # only set to 1 for vignette, use 4 chains ) ## ----------------------------------------------------------------------------- # extract diagnostics data <- diagnostics_dynamics( data = data, iter = 2000, # number of iterations used for model fitting # the dynamic model chains = 1 # number of chains used for model fitting ) plot_diagnostics( data = data ) ## ----------------------------------------------------------------------------- # PPCs can be accessed with plot_PPC( data = data ) ## ----fig.wide=TRUE------------------------------------------------------------ # #extract estimates data <- estimates_dynamics( data = data ) ## ----fig.wide=TRUE------------------------------------------------------------ # 1) the differences between two timepoints plots <- plot_estimates( data = data, delta_t = TRUE, dynamics = FALSE, distance_conditions = FALSE ) plots$delta_t$`A_time_point_2-time_point_1` plots$delta_t$`A_time_point_3-time_point_2` ## ----fig.wide=TRUE------------------------------------------------------------ # 1) the differences between two timepoints plot_estimates( data = data, delta_t = FALSE, dynamics = FALSE, distance_conditions = TRUE ) ## ----fig.wide=TRUE------------------------------------------------------------ # 2) dynamic profiles plot_estimates( data = data, dynamics = TRUE, distance_conditions = FALSE, delta_t = FALSE ) ## ----------------------------------------------------------------------------- # get estimates estimates <- metadata(data)[["estimates_dynamics"]] # only show first rows of condition A estimates$euclidean_distances ## ----fig.wide=TRUE------------------------------------------------------------ data <- cluster_dynamics(data, deepSplit = 2, B = 10 # number of bootstrapps ) # low clustering sensitivity ## ----------------------------------------------------------------------------- plots <- plot_cluster(data) ## ----------------------------------------------------------------------------- plots$trees$B ## ----------------------------------------------------------------------------- plots$lineplots$B ## ----------------------------------------------------------------------------- plots$patchwork$B ## ----------------------------------------------------------------------------- data("metabolite_modules") help("metabolite_modules") head(metabolite_modules) data("modules_compounds") help("modules_compounds") head(modules_compounds) ## ----------------------------------------------------------------------------- data("IDs") head(IDs) ## ----fig.wide=TRUE------------------------------------------------------------ data <- ORA_hypergeometric( background = modules_compounds, annotations = metabolite_modules, IDs = IDs, data = data, tested_column = "middle_hierarchy" ) plot_ORA(data = data) ## ----------------------------------------------------------------------------- # get cluster plots cluster <- plot_cluster(data = data) # set patchwork to TRUE and provide cluster plots ora <- plot_ORA(data = data, tested_column = "middle_hierarchy", patchwork = TRUE, plot_cluster = cluster) ## ----fig.width=12,fig.height=6------------------------------------------------ # patch plots together library(patchwork) p <- cluster$patchwork$A|ora$ora_patchwork$A p+plot_layout(ncol=4, widths = c(2,1,2,2)) ## ----fig.aling='center',fig.dpi=150------------------------------------------- data("longitudinalMetabolomics") longitudinalMetabolomics <- compare_dynamics( data = longitudinalMetabolomics, cores = 1 # just set to 1 for vignette, set to 4 if possible ) ## ----fig.aling='center',fig.width=8,fig.height=6------------------------------ heatmap_dynamics(data = longitudinalMetabolomics) ## ----fig.aling='center',fig.width=8,fig.height=6------------------------------ longitudinalMetabolomics <- compare_metabolites( data = longitudinalMetabolomics ) heatmap_metabolites(data = longitudinalMetabolomics) ## ----fig.aling='center',fig.height=6,fig.width=8------------------------------ dynamics <- metadata(longitudinalMetabolomics)[["comparison_dynamics"]][["estimates"]] metabolites <- metadata(longitudinalMetabolomics)[["comparison_metabolites"]] temp <- left_join(dynamics, metabolites, join_by("cluster_a", "cluster_b")) x <- unique(c(temp[, "cluster_a"], temp[, "cluster_b"])) temp <- temp %>% mutate(scale_Jaccard = scale(Jaccard)) ggplot(temp, aes(x = cluster_b, y = cluster_a)) + geom_point(aes(size = Jaccard, col = mu_mean)) + theme_bw() + scale_color_viridis_c(option = "magma") + scale_x_discrete(limits = x) + xlab("") + ylab("") + scale_y_discrete(limits = x) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + labs(col = "dynamics distance", size = "metabolite similarity") + ggtitle("comparison of clusters", "label = condition + cluster ID") ## ----------------------------------------------------------------------------- sessionInfo()