Microbiome Landscapes

Microbiome Landscaping refers to the analysis and illustration of population frequencies. Typically, these are wrappers based on standard ordination methods (for more examples, see ordination examples)

Two-dimensional microbiome landscape

Load example data:

library(microbiome)
library(phyloseq)
library(ggplot2)

data(dietswap)
pseq <- dietswap

# Convert to compositional data
pseq.rel <- microbiome::transform(pseq, "compositional")

# Pick core taxa
pseq.core <- core(pseq.rel, detection = 5/100, prevalence = 5/100)
pseq.core <- subset_samples(pseq.core, sex == "female" &
                                   bmi_group == "overweight")

Landscape figure

Visualize the microbiome landscape (sample similarities on two-dimensional projection). When using these tools, kindly cite Shetty et al. FEMS Microbiology Reviews, 41(2):182–199, 2017 doi:10.1093/femsre/fuw045.

PCA

# PCA with euclidean distance and CLR transformation
p <- plot_landscape(pseq, method = "PCA", transformation = "clr") +
       labs(title = paste("PCA / CLR"))
print(p)

PCoA / MDS

# PCoA for compositional data with Bray-Curtis distances
p <- plot_landscape(microbiome::transform(pseq.core, "compositional"),
                      method = "PCoA", distance = "bray") +
       labs(title = paste("PCoA / Compositional / Bray-Curtis"))
print(p)

t-SNE

p <- plot_landscape(pseq, "t-SNE",
       distance = "euclidean", transformation = "hellinger") +
       labs(title = paste("t-SNE / Hellinger / Euclidean"))       
print(p)

NMDS

# Landscape plot directly from phyloseq object
p <- plot_landscape(pseq.core, "NMDS", "bray", col = "nationality") +
       labs(title = paste("NMDS / Bray-Curtis"))

For direct access to the ordination coordinates, use the following:

# Project the samples with the given method and dissimilarity measure. 
# Ordinate the data; note that some ordinations are sensitive to random seed
# "quiet" is used to suppress intermediate outputs
set.seed(423542)
x <- pseq.core
quiet(x.ord <- ordinate(x, "NMDS", "bray"))
# Pick the projected data (first two columns + metadata)
proj <- phyloseq::plot_ordination(x, x.ord, justDF=TRUE)
# Rename the projection axes
names(proj)[1:2] <- paste("Comp", 1:2, sep=".")

# Same with a generic data.frame
# (note that random seed will affect the exact ordination)
p <- plot_landscape(proj[, 1:2], col = proj$nationality, legend = T)
print(p)

# Visualize sample names:
ax1 <- names(proj)[[1]]
ax2 <- names(proj)[[2]]
p <- ggplot(aes_string(x = ax1, y = ax2, label = "sample"), data = proj) +
       geom_text(size = 2)
print(p)

Abundance histograms (one-dimensional landscapes)

Population densities for Dialister:

# Load libraries
library(microbiome)
library(phyloseq)
pseq <- dietswap

# Visualize population densities for specific taxa
plot_density(pseq, "Dialister") + ggtitle("Absolute abundance")

# Same with log10 compositional abundances
x <- microbiome::transform(pseq, "compositional")
tax <- "Dialister"
plot_density(x, tax, log10 = TRUE) +
  ggtitle("Relative abundance") +
  xlab("Relative abundance (%)")