## ----style, echo = FALSE, results = 'asis'------------------------------------ BiocStyle::markdown() ## ----setup-------------------------------------------------------------------- library(MetaboDynamics) library(SummarizedExperiment) # storing and manipulating simulated metabolomics data library(ggplot2) # visualization library(dplyr) # data handling library(tidyr) # data handling ## ----------------------------------------------------------------------------- data("longitudinalMetabolomics_df", package = "MetaboDynamics") ## ----fig.wide=TRUE------------------------------------------------------------ data("longitudinalMetabolomics_df") # take a sample (of five metabolites and subset data samples <- c( "UMP", "PEP", "2-Aminomuconate","ATP" ) data <- longitudinalMetabolomics_df %>% filter(metabolite %in% samples) head(data) ## ----------------------------------------------------------------------------- # fit dynamics model fit <- # when using a data frame as input fits have to stored in a separate object fit_dynamics_model( model = "scaled_log", data = data, scaled_measurement = "m_scaled", # in which column are scaled measurments stored? max_treedepth = 10, adapt_delta = 0.99, # default 0.95 iter = 2000, warmup = 2000 / 4, # default is 1/4 of iterations chains = 1, # only set to 1 in vignette, recommended default is 4! cores = 1 # only set to 1 in vignette, can be same as chains if machine allows for parallelization ) ## ----------------------------------------------------------------------------- # Extract diagnostics diagnostics <- # when using a data frame as input diagnostics have to be stored in a separate object diagnostics_dynamics( data = data, # data frame that was used to fit the dynamics model, fit = fit, # list of fits from dynamics model, result of fit_dynamics_mode function iter = 2000, # how many iterations were used to fit the dynamics model chains = 1, # how many chains were used to fit the dynamics model ) ## ----------------------------------------------------------------------------- plot_diagnostics( data = data, # data frame used to fit the dynamics model diagnostics = diagnostics[["model_diagnostics"]] # summary of diagnostics ) plot_PPC( data = data, posterior = diagnostics[["posterior"]], scaled_measurement = "m_scaled" ) ## ----------------------------------------------------------------------------- estimates <- # estimates have to be stored in a separate object when using data frames estimates_dynamics( data = data, fit = fit) ## ----------------------------------------------------------------------------- # only visualize differences between time points plot_estimates( # does not need data as input estimates = estimates, delta_t = TRUE, # choose to visualize differences between time points dynamics = FALSE, distance_conditions = FALSE ) # only visualize dynamics plot_estimates( estimates = estimates, delta_t = FALSE, distance_conditions = FALSE, dynamics = TRUE ) # choose to visualize the dynamics # only visualize euclidean distance between dynamics vectors # only visualize dynamics plot_estimates( estimates = estimates, delta_t = FALSE, distance_conditions = TRUE, dynamics = FALSE ) # choose to visualize the dynamics ## ----------------------------------------------------------------------------- head(estimates) ## ----------------------------------------------------------------------------- cluster <- # clustering results have to be stored in separate object when using data frame as input cluster_dynamics( estimates = estimates, # data is now the estimates or a data frame ob similar structure fit = fit, distance = "euclidean", # which distance method should be used agglomeration = "ward.D2", # which agglomeration method for hierarchical clustering should be used deepSplit = 2, # sensitivity of cluster analysis, minClusterSize = 1, # minimum number of metabolites in one cluster B = 10, # number of bootstrapps ) ## ----------------------------------------------------------------------------- plots <- plot_cluster(cluster) plots$trees$A plots$clusterplots$A plots$lineplots$A plots$patchwork$A ## ----------------------------------------------------------------------------- # load background dataset data("modules_compounds") data("metabolite_modules") # get KEGGs from data data("IDs") ## ----------------------------------------------------------------------------- ora <- # store ORA result to separate object when using data frames as input ORA_hypergeometric( background = modules_compounds, annotations = metabolite_modules, data = cluster, # dataframe format of clustering output tested_column = "middle_hierarchy", IDs = IDs ) # Visualization plot_ORA( data = ora, tested_column = "middle_hierarchy" ) ## ----------------------------------------------------------------------------- comparison_dynamics <- # result needs to be stored in a separate object when using data frames compare_dynamics( data = cluster, # clustering result cores = 1 # only set to 1 for vignette, can be increased to 4 for parallelization ) ## ----------------------------------------------------------------------------- # Visualize comparison results heatmap_dynamics( estimates = comparison_dynamics[["estimates"]], data = cluster ) ## ----------------------------------------------------------------------------- # compare metabolite composition compare_metabolites <- compare_metabolites( data = cluster ) # Visualization heatmap_metabolites( distances = compare_metabolites, data = cluster ) ## ----------------------------------------------------------------------------- # combine comparison results temp <- left_join( comparison_dynamics[["estimates"]], # dynamics comparison compare_metabolites, join_by("cluster_a", "cluster_b") # join by cluster comparisons ) # get unique clusters x <- unique(c(temp[, "cluster_a"], temp[, "cluster_b"])) # draw plot ggplot(temp, aes(x = cluster_b, y = cluster_a)) + geom_point(aes(size = Jaccard, col = mu_mean)) + theme_bw() + scale_color_viridis_c(option = "magma") + scale_x_discrete(limits = x) + xlab("") + ylab("") + scale_y_discrete(limits = x) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) + labs(col = "dynamics distance", size = "metabolite similarity") + ggtitle("comparison of clusters", "label = condition + cluster ID") ## ----------------------------------------------------------------------------- sessionInfo()