Dense samples of the minimal gut microbiome. In the initial hours,
MDb-MM was grown under batch condition and 24 h onwards, continuous
feeding of media with pulse feeding cycles. This information is stored
in the colData
.
library(miaTime)
data(minimalgut)
tse <- minimalgut
# Quick check of number of samples
table(tse[["StudyIdentifier"]], tse[["condition_1"]])
#>
#> batch_carbs DoS pulse Overnight
#> Bioreactor A 4 38 19
#> Bioreactor B 4 38 19
#> Bioreactor C 4 38 19
Visualize samples available for each of the bioreactors. This allows to identify if there are any missing samples for specific times.
library(ggplot2)
colData(tse) |>
ggplot() +
geom_tile(
aes(x = as.factor(Time.hr), y = StudyIdentifier, fill = condition_1))
The minimalgut
dataset, mucus-diet based minimal
microbiome (MDbMM-16), consists of 16 species assembled in three
bioreactors. We can investigate the succession of mdbMM16 from the start
of experiment here hour zero until the end of the experiment.
# Transform data to relativeS
tse <- transformAssay(tse, method = "relabundance")
# Divergence from baseline i.e from hour zero
tse <- addBaselineDivergence(
tse,
assay.type = "relabundance",
method = "bray",
group = "StudyIdentifier",
time.col = "Time.hr",
)
Let’s then visualize the divergence.
library(scater)
# Create a time series plot for divergence
p <- plotColData(
tse, x = "Time.hr", y = "divergence", colour_by = "StudyIdentifier") +
# Add line between points
geom_line(aes(group = .data[["colour_by"]], colour = .data[["colour_by"]]))
p
Now visualize abundance of Blautia hydrogenotrophica using
the miaViz::plotSeries()
function.
library(miaViz)
# Plot certain feature by time
p <- plotSeries(
tse,
x = "Time.hr", y = "Blautia_hydrogenotrophica", colour_by = "Species",
assay.type = "relabundance")
p
Sample dissimilarity between consecutive time steps(step size n >=
1) within a group(subject, age, reaction chamber, etc.) can be
calculated by addStepwiseDivergence
.
# Divergence between consecutive time points
tse <- addStepwiseDivergence(
tse,
assay.type = "relabundance",
method = "bray",
group = "StudyIdentifier",
time.interval = 1,
time.col = "Time.hr",
name = c("divergence_from_previous_step",
"time_from_previous_step", "reference_samples")
)
The results are again stored in colData
. We calculate
the speed of divergence change by dividing each divergence change by the
corresponding change in time. Then we use similar plotting methods as
previously.
# Calculate slope for the change
tse[["divergence_change"]] <- tse[["divergence_from_previous_step"]] /
tse[["time_from_previous_step"]]
# Create a time series plot for divergence
p <- plotColData(
tse,
x = "Time.hr",
y = "divergence_change",
colour_by = "StudyIdentifier"
) +
# Add line between points
geom_line(aes(group = .data[["colour_by"]], colour = .data[["colour_by"]]))
p
This shows how to calculate and plot moving average for the variable of interest (here: slope).
library(dplyr)
# Calculate moving average with time window of 3 time points
tse[["sliding_divergence"]] <- colData(tse) |>
as.data.frame() |>
# Group based on reactor
group_by(StudyIdentifier) |>
# Calculate moving average
mutate(sliding_avg = (
# We get the previous 2 samples
lag(divergence_change, 2) +
lag(divergence_change, 1) +
# And the current sample
divergence_change
# And take average
) / 3
) |>
# Get only the values as vector
ungroup() |>
pull(sliding_avg)
After calculating the moving average of divergences, we can visualize the result in a similar way to our previous approach.
# Create a time series plot for divergence
p <- plotColData(
tse,
x = "Time.hr",
y = "sliding_divergence",
colour_by = "StudyIdentifier"
) +
# Add line between points
geom_line(aes(group = .data[["colour_by"]], colour = .data[["colour_by"]]))
p
sessionInfo()
#> R version 4.5.0 (2025-04-11)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.2 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices utils datasets methods
#> [8] base
#>
#> other attached packages:
#> [1] dplyr_1.1.4 miaViz_1.17.0
#> [3] ggraph_2.2.1 scater_1.37.0
#> [5] scuttle_1.19.0 ggplot2_3.5.2
#> [7] miaTime_0.99.9 mia_1.17.0
#> [9] TreeSummarizedExperiment_2.17.0 Biostrings_2.77.0
#> [11] XVector_0.49.0 SingleCellExperiment_1.31.0
#> [13] MultiAssayExperiment_1.35.1 SummarizedExperiment_1.39.0
#> [15] Biobase_2.69.0 GenomicRanges_1.61.0
#> [17] GenomeInfoDb_1.45.3 IRanges_2.43.0
#> [19] S4Vectors_0.47.0 BiocGenerics_0.55.0
#> [21] generics_0.1.3 MatrixGenerics_1.21.0
#> [23] matrixStats_1.5.0 knitr_1.50
#> [25] BiocStyle_2.37.0
#>
#> loaded via a namespace (and not attached):
#> [1] splines_4.5.0 ggplotify_0.1.2
#> [3] tibble_3.2.1 cellranger_1.1.0
#> [5] polyclip_1.10-7 pROC_1.18.5
#> [7] DirichletMultinomial_1.51.0 lifecycle_1.0.4
#> [9] rstatix_0.7.2 doParallel_1.0.17
#> [11] lattice_0.22-7 MASS_7.3-65
#> [13] SnowballC_0.7.1 backports_1.5.0
#> [15] magrittr_2.0.3 sass_0.4.10
#> [17] rmarkdown_2.29 jquerylib_0.1.4
#> [19] yaml_2.3.10 cowplot_1.1.3
#> [21] DBI_1.2.3 RColorBrewer_1.1-3
#> [23] abind_1.4-8 purrr_1.0.4
#> [25] fillpattern_1.0.2 yulab.utils_0.2.0
#> [27] tweenr_2.0.3 circlize_0.4.16
#> [29] KMsurv_0.1-5 ggrepel_0.9.6
#> [31] tokenizers_0.3.0 irlba_2.3.5.1
#> [33] tidytree_0.4.6 vegan_2.6-10
#> [35] rbiom_2.2.0 parallelly_1.43.0
#> [37] pkgdown_2.1.2 permute_0.9-7
#> [39] DelayedMatrixStats_1.31.0 codetools_0.2-20
#> [41] DelayedArray_0.35.1 ggforce_0.4.2
#> [43] ggtext_0.1.2 xml2_1.3.8
#> [45] tidyselect_1.2.1 shape_1.4.6.1
#> [47] aplot_0.2.5 UCSC.utils_1.5.0
#> [49] farver_2.1.2 ScaledMatrix_1.17.0
#> [51] viridis_0.6.5 jsonlite_2.0.0
#> [53] GetoptLong_1.0.5 BiocNeighbors_2.3.0
#> [55] tidygraph_1.3.1 decontam_1.29.0
#> [57] Formula_1.2-5 survival_3.8-3
#> [59] iterators_1.0.14 emmeans_1.11.1
#> [61] systemfonts_1.2.3 foreach_1.5.2
#> [63] tools_4.5.0 ggnewscale_0.5.1
#> [65] treeio_1.33.0 ragg_1.4.0
#> [67] Rcpp_1.0.14 glue_1.8.0
#> [69] gridExtra_2.3 SparseArray_1.9.0
#> [71] BiocBaseUtils_1.11.0 xfun_0.52
#> [73] mgcv_1.9-3 withr_3.0.2
#> [75] BiocManager_1.30.25 fastmap_1.2.0
#> [77] bluster_1.19.0 digest_0.6.37
#> [79] rsvd_1.0.5 gridGraphics_0.5-1
#> [81] R6_2.6.1 estimability_1.5.1
#> [83] textshaping_1.0.1 colorspace_2.1-1
#> [85] tidyr_1.3.1 data.table_1.17.0
#> [87] DECIPHER_3.5.0 graphlayouts_1.2.2
#> [89] httr_1.4.7 htmlwidgets_1.6.4
#> [91] S4Arrays_1.9.0 pkgconfig_2.0.3
#> [93] gtable_0.3.6 ComplexHeatmap_2.25.0
#> [95] survMisc_0.5.6 janeaustenr_1.0.0
#> [97] htmltools_0.5.8.1 carData_3.0-5
#> [99] bookdown_0.43 clue_0.3-66
#> [101] scales_1.4.0 png_0.1-8
#> [103] coda4microbiome_0.2.4 corrplot_0.95
#> [105] ggfun_0.1.8 km.ci_0.5-6
#> [107] tzdb_0.5.0 reshape2_1.4.4
#> [109] rjson_0.2.23 nlme_3.1-168
#> [111] cachem_1.1.0 zoo_1.8-14
#> [113] GlobalOptions_0.1.2 stringr_1.5.1
#> [115] parallel_4.5.0 vipor_0.4.7
#> [117] desc_1.4.3 pillar_1.10.2
#> [119] grid_4.5.0 vctrs_0.6.5
#> [121] slam_0.1-55 ggpubr_0.6.0
#> [123] BiocSingular_1.25.0 car_3.1-3
#> [125] beachmat_2.25.0 xtable_1.8-4
#> [127] cluster_2.1.8.1 beeswarm_0.4.0
#> [129] evaluate_1.0.3 readr_2.1.5
#> [131] mvtnorm_1.3-3 cli_3.6.5
#> [133] compiler_4.5.0 rlang_1.1.6
#> [135] crayon_1.5.3 tidytext_0.4.2
#> [137] ggsignif_0.6.4 labeling_0.4.3
#> [139] survminer_0.5.0 plyr_1.8.9
#> [141] fs_1.6.6 ggbeeswarm_0.7.2
#> [143] stringi_1.8.7 viridisLite_0.4.2
#> [145] BiocParallel_1.43.0 lazyeval_0.2.2
#> [147] glmnet_4.1-8 Matrix_1.7-3
#> [149] hms_1.1.3 patchwork_1.3.0
#> [151] sparseMatrixStats_1.21.0 gridtext_0.1.5
#> [153] memoise_2.0.1 igraph_2.1.4
#> [155] broom_1.0.8 bslib_0.9.0
#> [157] ggtree_3.17.0 readxl_1.4.5
#> [159] ape_5.8-1