Minimal gut microbiome

Dense samples of the minimal gut microbiome. In the initial hours, MDb-MM was grown under batch condition and 24 h onwards, continuous feeding of media with pulse feeding cycles. This information is stored in the colData.

library(miaTime)
data("minimalgut")
tse <- minimalgut

# quick check of number of samples 
kable(table(colData(tse)$StudyIdentifier,colData(tse)$condition_1))
batch_carbs DoS pulse Overnight
Bioreactor A 4 38 19
Bioreactor B 4 38 19
Bioreactor C 4 38 19

Visualize samples available for each of the bioreactors. This allows to identify if there are any missing samples for specific times.

## 
## Attaching package: 'tidySummarizedExperiment'
## The following objects are masked from 'package:dplyr':
## 
##     bind_cols, bind_rows, count
## The following objects are masked from 'package:mia':
## 
##     full_join, inner_join, left_join, right_join
## The following object is masked from 'package:XVector':
## 
##     slice
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:matrixStats':
## 
##     count
## The following object is masked from 'package:stats':
## 
##     filter
tse |> 
    ggplot(aes(as.factor(Time.hr), StudyIdentifier)) +
    geom_tile(aes(fill=condition_1), color="white") +
    scale_fill_manual("Condition Sampled", 
                      values = c("#ff006e", "#e07a5f", "#457b9d")) +
    theme_minimal() +
    theme(axis.text.x = element_text(size=8, angle = 90),
          legend.position = "top") +
    labs(x="Time (h)", y="")

Community dynamics

The minimalgut dataset, mucus-diet based minimal microbiome (MDbMM-16), consists of 16 species assembled in three bioreactors. We can investigate the succession of mdbMM16 from the start of experiment here hour zero until the end of the experiment.

## Divergence from baseline i.e from hour zero.
tse <- mia::relAbundanceCounts(minimalgut) # get relative abundance
tse <- getBaselineDivergence(tse,
                             group = "StudyIdentifier",
                             time_field = "Time.hr",
                             name_divergence = "divergence_from_baseline",
                             name_timedifference = "time_from_baseline",
                             assay_name="relabundance",
                             FUN = vegan::vegdist,
                             method="bray")

Visualize the divergence

# First define nice colors for bioreactors
bioreac_cols <- c(`Bioreactor A`= "#b2182b",
                  `Bioreactor B`= "#2166ac",
                  `Bioreactor C` = "#35978f")

tse |>
    ggplot(aes(x=Time.hr, y=divergence_from_baseline))+
    geom_point(aes(color=StudyIdentifier), size=2, alpha=0.5) +
    geom_line(aes(color=StudyIdentifier)) +
    theme_minimal() +
    scale_color_manual(values = bioreac_cols) +
    labs(x="Time (h)", y="Divergence \nfrom baseline") +
    # highlight specific timepoints
    geom_vline(xintercept = 152, lty=2, color="#991720") + 
    geom_vline(xintercept = 248, lty=2, color= "#0963bd")+
    annotate("text",x=c(152, 248),y=c(0.8, 0.8),
             label=c("Addition of\nB.hydrogenotrophica","Acetate Discontinued"),
             hjust=c(1.05,-0.05))

Visualizing selected taxa

Now visualize abundance of Blautia hydrogenotrophica using the miaViz::plotSeries function.

library(miaViz)
## Loading required package: ggraph
plotSeries(mia::relAbundanceCounts(minimalgut),
           x = "Time.hr",
           y = "Blautia_hydrogenotrophica",
           colour_by = "Species",
           assay_name = "relabundance")+
    geom_vline(xintercept = 152, lty=2, color="#991720") + 
    geom_vline(xintercept = 248, lty=2, color= "#0963bd")+
    annotate("text",x=c(152, 248),y=c(0.2, 0.15),
             label=c("Addition of\nB.hydrogenotrophica","Acetate Discontinued"),
             hjust=c(1.05,-0.05))+
    labs(x="Time (h)", y="B.hydrogenotrophica\nRelative Abundance") +
    theme(legend.position = "none") 

Visualize the rate (slope) of divergence

Sample dissimilarity between consecutive time steps(step size n >= 1) within a group(subject, age, reaction chamber, etc.) can be calculated by getStepwiseDivergence. If we normalize this by the time interval, this gives an approximate slope for the change.

# Load libraries
library(miaTime)
library(dplyr)

# Sort samples by time (necessary for getStepwiseDivergence)
tse <- tse[, order(colData(tse)$Time_hr_num)]

# Divergence between consecutive time points
tse <- getStepwiseDivergence(tse, group = "StudyIdentifier",
                               time_interval = 1,
                               time_field = "Time_hr_num",
                               name_divergence = "divergence_from_previous_step",
                               name_timedifference = "time_from_previous_step",
                               assay_name="relabundance",
                               FUN = vegan::vegdist,
                               method="bray")

# We have now new fields added in the colData:
# time_from_previous_step, divergence_from_previous_step
# print(colData(tse))

# Visualize the slope of dissimilarity between consecutive time points as a function of time (from baseline)
library(ggplot2)
theme_set(theme_bw(10))
p <- tse |> ggplot(aes(x=time_from_baseline,
                   y=divergence_from_previous_step/time_from_previous_step,
                   color=StudyIdentifier)) +
       geom_point() +
       geom_line() +
       labs(x="Time (hours)", y="Slope of dissimilarity (Bray-Curtis)") +
       geom_hline(aes(yintercept=0), linetype=2)

print(p)

Moving average of the slope

This shows how to calculate and plot moving average for the variable of interest (here: slope).

# Add slope explicitly in colData
colData(tse)$slope <- colData(tse)$divergence_from_previous_step / colData(tse)$time_from_previous_step

# Split by group and perform operation
tselist <- mia::splitOn(tse, "StudyIdentifier")

# colData(tse)$divergence_from_previous_step
addmean <- function (x, k, field, field_name) {
  # Calculate rolling mean
  m <- zoo::rollmean(x[[field]], k = k)
  # Initialize empty field
  colData(x)[[field_name]] <- rep(NA, ncol(x))
  # Fill in the rolling mean (length does not match with original data in rolling mean)
  colData(x)[1:length(m), field_name] <- m
  # Return the object with a new field added
  x
}

# Calculate sliding average for the field "divergence_from_previous_step"
# and store the result in a new field with the name "sliding_average"
tselist2 <- lapply(tselist, function (x) {addmean(x, k=3, field = "slope", field_name = "sliding_average")})

# Merge back
tse <- SEtools::mergeSEs(tselist2)

# Visualize
theme_set(theme_bw(10))
p <- tse |> ggplot(aes(x = time_from_baseline,
                   y = sliding_average,
                   color=StudyIdentifier)) +
     geom_point() +
     geom_line() +
     labs(x="Time (hours)", y="Mean slope of dissimilarity (Bray-Curtis)") +
     geom_hline(aes(yintercept=0), linetype=2)
print(p)

Error bars

This shows how to visualize error bars on top of time series. In this example we create artificial replicates and variation for a brief example.

## Source: https://www.geeksforgeeks.org/adding-error-bars-to-a-line-graph-with-ggplot2-in-r/
## Gives count, mean, standard deviation, standard error of the mean,
## and confidence interval (default 95%).
## data: a data frame.
## measurevar: the name of a column that contains the variable to be summariezed
## groupvars: a vector containing names of columns that contain grouping variables
## na.rm: a boolean that indicates whether to ignore NA's
## conf.interval: the percent range of the confidence interval (default is 95%)
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE,
                      conf.interval=.95, .drop=TRUE) {
    library(plyr)

    # New version of length which can handle NA's: if na.rm==T, don't count them
    length2 <- function (x, na.rm=FALSE) {
        if (na.rm) sum(!is.na(x))
        else       length(x)
    }

    # This does the summary. For each group's data frame, return a vector with
    # N, mean, and sd
    datac <- ddply(data, groupvars, .drop=.drop,
      .fun = function(xx, col) {
        c(N    = length2(xx[[col]], na.rm=na.rm),
          mean = mean   (xx[[col]], na.rm=na.rm),
          sd   = sd     (xx[[col]], na.rm=na.rm)
        )
      },
      measurevar
    )

    # Rename the "mean" column    
    datac <- rename(datac, c("mean" = measurevar))

    datac$se <- datac$sd / sqrt(datac$N)  # Calculate standard error of the mean

    # Confidence interval multiplier for standard error
    # Calculate t-statistic for confidence interval: 
    # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1
    ciMult <- qt(conf.interval/2 + .5, datac$N-1)
    datac$ci <- datac$se * ciMult

    return(datac)
}

# Create artificial example on replicates because we don't have any in this demo data
set.seed(3422)
df <- as.data.frame(colData(tse))
sdlevel <- 0.1*mean(df$sliding_average, na.rm=TRUE)
df1 <- df
df2 <- df; df2$sliding_average <- rnorm(mean=df1$sliding_average, sd = sdlevel, n=nrow(df1)) 
df3 <- df; df3$sliding_average <- rnorm(mean=df1$sliding_average, sd = sdlevel, n=nrow(df1))
df <- bind_rows(list(df1, df2, df3))

# Calculate deviations
df <- summarySE(df, measurevar="sliding_average", groupvars=c("StudyIdentifier", "time_from_baseline"))

# Visualize
theme_set(theme_bw(10))
p <- ggplot(df,
         aes(x = time_from_baseline, y = sliding_average, color=StudyIdentifier)) +
     geom_point() +
     geom_line() +
     geom_errorbar(aes(ymin=sliding_average-sd, ymax=sliding_average+sd), width=.2,
                 position=position_dodge(0.05)) +    
     labs(x="Time (hours)", y="Mean slope of dissimilarity (Bray-Curtis)") +
     geom_hline(aes(yintercept=0), linetype=2)
print(p)

Session info

## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] plyr_1.8.7                     miaViz_1.5.4                  
##  [3] ggraph_2.1.0                   tidySummarizedExperiment_1.7.0
##  [5] ggplot2_3.3.6                  knitr_1.40                    
##  [7] lubridate_1.8.0                dplyr_1.0.10                  
##  [9] miaTime_0.1.15                 mia_1.5.17                    
## [11] MultiAssayExperiment_1.23.9    TreeSummarizedExperiment_2.1.4
## [13] Biostrings_2.65.6              XVector_0.37.1                
## [15] SingleCellExperiment_1.19.1    SummarizedExperiment_1.27.3   
## [17] Biobase_2.57.1                 GenomicRanges_1.49.1          
## [19] GenomeInfoDb_1.33.8            IRanges_2.31.2                
## [21] S4Vectors_0.35.4               BiocGenerics_0.43.4           
## [23] MatrixGenerics_1.9.1           matrixStats_0.62.0            
## [25] BiocStyle_2.25.0              
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                  tidyselect_1.2.0           
##   [3] RSQLite_2.2.18              AnnotationDbi_1.59.1       
##   [5] htmlwidgets_1.5.4           grid_4.2.1                 
##   [7] TSP_1.2-1                   BiocParallel_1.31.13       
##   [9] Rtsne_0.16                  munsell_0.5.0              
##  [11] ScaledMatrix_1.5.1          codetools_0.2-18           
##  [13] ragg_1.2.3                  withr_2.5.0                
##  [15] colorspace_2.0-3            highr_0.9                  
##  [17] labeling_0.4.2              GenomeInfoDbData_1.2.9     
##  [19] polyclip_1.10-0             bit64_4.0.5                
##  [21] farver_2.1.1                pheatmap_1.0.12            
##  [23] rprojroot_2.0.3             vctrs_0.4.2                
##  [25] treeio_1.21.2               generics_0.1.3             
##  [27] xfun_0.33                   R6_2.5.1                   
##  [29] doParallel_1.0.17           graphlayouts_0.8.2         
##  [31] ggbeeswarm_0.6.0            clue_0.3-61                
##  [33] rsvd_1.0.5                  seriation_1.3.6            
##  [35] locfit_1.5-9.6              gridGraphics_0.5-1         
##  [37] bitops_1.0-7                cachem_1.0.6               
##  [39] DelayedArray_0.23.2         scales_1.2.1               
##  [41] SEtools_1.11.1              beeswarm_0.4.0             
##  [43] gtable_0.3.1                beachmat_2.13.4            
##  [45] sva_3.45.0                  tidygraph_1.2.2            
##  [47] rlang_1.0.6                 genefilter_1.79.0          
##  [49] systemfonts_1.0.4           GlobalOptions_0.1.2        
##  [51] splines_4.2.1               lazyeval_0.2.2             
##  [53] BiocManager_1.30.18         yaml_2.3.5                 
##  [55] reshape2_1.4.4              tools_4.2.1                
##  [57] bookdown_0.29               ggplotify_0.1.0            
##  [59] ellipsis_0.3.2              decontam_1.17.0            
##  [61] jquerylib_0.1.4             RColorBrewer_1.1-3         
##  [63] Rcpp_1.0.9                  sparseMatrixStats_1.9.0    
##  [65] zlibbioc_1.43.0             purrr_0.3.5                
##  [67] RCurl_1.98-1.9              GetoptLong_1.0.5           
##  [69] viridis_0.6.2               zoo_1.8-11                 
##  [71] ggrepel_0.9.1               cluster_2.1.4              
##  [73] fs_1.5.2                    DECIPHER_2.25.3            
##  [75] magrittr_2.0.3              data.table_1.14.2          
##  [77] openxlsx_4.2.5              circlize_0.4.15            
##  [79] ggnewscale_0.4.8            randomcoloR_1.1.0.1        
##  [81] patchwork_1.1.2             evaluate_0.17              
##  [83] xtable_1.8-4                XML_3.99-0.11              
##  [85] gridExtra_2.3               shape_1.4.6                
##  [87] compiler_4.2.1              scater_1.25.7              
##  [89] tibble_3.1.8                V8_4.2.1                   
##  [91] crayon_1.5.2                htmltools_0.5.3            
##  [93] ggfun_0.0.7                 mgcv_1.8-40                
##  [95] aplot_0.1.8                 tidyr_1.2.1                
##  [97] geneplotter_1.75.0          DBI_1.1.3                  
##  [99] tweenr_2.0.2                ComplexHeatmap_2.13.1      
## [101] MASS_7.3-58.1               Matrix_1.5-1               
## [103] permute_0.9-7               cli_3.4.1                  
## [105] parallel_4.2.1              igraph_1.3.5               
## [107] pkgconfig_2.0.3             pkgdown_2.0.6              
## [109] registry_0.5-1              plotly_4.10.0              
## [111] scuttle_1.7.4               foreach_1.5.2              
## [113] ggtree_3.5.3                annotate_1.75.0            
## [115] vipor_0.4.5                 bslib_0.4.0                
## [117] DirichletMultinomial_1.39.0 yulab.utils_0.0.5          
## [119] stringr_1.4.1               digest_0.6.29              
## [121] vegan_2.6-5                 rmarkdown_2.17             
## [123] tidytree_0.4.1              edgeR_3.39.6               
## [125] DelayedMatrixStats_1.19.2   curl_4.3.3                 
## [127] rjson_0.2.21                lifecycle_1.0.3            
## [129] nlme_3.1-159                jsonlite_1.8.2             
## [131] BiocNeighbors_1.15.1        desc_1.4.2                 
## [133] viridisLite_0.4.1           limma_3.53.10              
## [135] fansi_1.0.3                 pillar_1.8.1               
## [137] lattice_0.20-45             KEGGREST_1.37.3            
## [139] fastmap_1.1.0               httr_1.4.4                 
## [141] survival_3.4-0              glue_1.6.2                 
## [143] zip_2.2.1                   sechm_1.5.1                
## [145] png_0.1-7                   iterators_1.0.14           
## [147] bit_4.0.4                   ggforce_0.4.1              
## [149] stringi_1.7.8               sass_0.4.2                 
## [151] blob_1.2.3                  textshaping_0.3.6          
## [153] DESeq2_1.37.6               BiocSingular_1.13.1        
## [155] memoise_2.0.1               irlba_2.3.5.1              
## [157] ape_5.6-2