Heatmaps for microbiome analysis

See Composition page for further microbiota composition heatmaps, as well as the phyloseq tutorial and Neatmaps. Moreover, the aheatmap function of the NMF package provides further high quality heatmap plotting capabilities with row and column annotation color bars, clustering trees and other useful features that are often missing from standard heatmap tools in R.

Load some example data:

library(microbiome) # Load libraries
library(phyloseq)
library(dplyr)
library(reshape2)
library(knitr)


data(peerj32)
pseq <- peerj32$phyloseq    # Rename data


# Pick data subset (DI samples from Phylum Bacteroidetes)
pseq2 <- pseq %>%
         subset_taxa(Phylum == "Bacteroidetes") %>%
         subset_samples(group == "LGG")


# Z transformed abundance data
pseqz <- microbiome::transform(pseq2, "Z")

Matrix heatmaps

Visualize the Z-transformed abundance matrix

# Plot the abundances heatmap, round values to 2 decimals
dfm <- melt(round(abundances(pseqz), 1))

colnames(dfm) <- c("Taxa", "Sample", "value")

heat(dfm, "Taxa", "Sample", "value") +
  theme(text=element_text(size=10), 
        axis.text.x = element_text(angle = 90, hjust = 1),
        legend.key.size = unit(1.2, "cm"))

Find visually appealing order for rows and columns with the Neatmap approach:

# Sort the matrix rows and cols directly
xo <- neat(abundances(pseqz), method = "NMDS", distance = "euclidean")

# Heatmap visualization, round to two decimals
dfm <- melt(round(xo, 1))

colnames(dfm) <- c("Taxa", "Sample", "value")

heat(dfm, "Taxa", "Sample", "value") +
  theme(text=element_text(size=10), 
        axis.text.x = element_text(angle = 90, hjust = 1), 
        legend.key.size = unit(1.3, "cm"))

# or use a shortcut to sorting rows (or columns) if just the order was needed
sorted.rows <- neatsort(abundances(pseqz), "rows", method = "NMDS", distance = "euclidean") 

Cross-correlating data sets

Cross-correlate columns of two data sets from related to microbiome and blood serum lipids associations (PeerJ 1:e32).

The function returns correlations, raw p-values, and fdr estimates (not strictly proper as the comparisons are not independent). Keep only those elements that have at least only one significant correlation (n.signif):

# Load example data 
otu <- peerj32$microbes 
lipids <- peerj32$lipids 

# Define data sets to cross-correlate
x <- log10(otu) # OTU Log10 (44 samples x 130 genera)
y <- as.matrix(lipids) # Lipids (44 samples x 389 lipids)

# Cross correlate data sets
correlations <- associate(x, y, method = "spearman", mode = "matrix", p.adj.threshold = 0.05, n.signif = 1)

# Or, alternatively, the same output is also available in a handy table format
correlation.table <- associate(x, y, method = "spearman", mode = "table", p.adj.threshold = 0.05, n.signif = 1)

kable(head(correlation.table))
X1 X2 Correlation p.adj
1648 Ruminococcus gnavus et rel. TG(54:5).2 0.7164958 0.0022842
384 Moraxellaceae PC(40:3e) -0.6941863 0.0029225
1829 Uncultured Bacteroidetes TG(56:2).1 -0.6987375 0.0029225
349 Lactobacillus plantarum et rel. PC(40:3) -0.6877976 0.0031520
1198 Ruminococcus gnavus et rel. TG(52:5) 0.6806216 0.0037518
264 Moraxellaceae PC(38:4).1 -0.6700504 0.0038414

Association heatmaps

Rearrange the data and plot the heatmap and mark significant correlations with stars to reproduce microbiota-lipidome heatmap from Lahti et al. PeerJ (2013) (the ordering of rows and columns may be different):

p <- heat(correlation.table, "X1", "X2", 
          fill = "Correlation", 
          star = "p.adj", 
          p.adj.threshold = 0.05) 
p + theme(text=element_text(size=10), 
          axis.text.x = element_text(angle = 90, hjust = 1), 
          legend.key.size = unit(1.3, "cm"))

Heatmaps with ggplot2

The above examples provide handy shortcuts for heatmap visualization. You can also directly modify the ggplot2 routines. This time, let us set q-value threshold also for cell coloring:

# Order the rows and columns with levels argument if needed:
correlation.table$X1 <- factor(correlation.table$X1, levels = unique(as.character(correlation.table$X1)))
correlation.table$X2 <- factor(correlation.table$X2, levels = unique(as.character(correlation.table$X2)))

# Set black-and-white theme
library(ggplot2)
theme_set(theme_bw())

# Pick only the correlations with q<0.05
# Note: this will leave other cells empty
library(dplyr)
subtable <- filter(correlation.table, p.adj < 0.05)

# Arrange the figure
p <- ggplot(subtable, aes(x = X1, y = X2, fill = Correlation))
p <- p + geom_tile() 
p <- p + scale_fill_gradientn("Correlation", 
                       breaks = seq(from = -1, to = 1, by = 0.2), 
                   colours = c("darkblue", "blue", "white", "red", "darkred"), 
                   limits = c(-1,1)) 

# Polish texts
p <- p + theme(axis.text.x=element_text(angle = 90, hjust=1, face = "italic"),
               axis.text.y=element_text(size = 8))
p <- p + xlab("") + ylab("")

# Mark the most significant cells with stars
p <- p + geom_text(data = subset(correlation.table, p.adj < 0.02), 
               aes(x = X1, y = X2, label = "+"), col = "white", size = 5)

# Plot
print(p)

Heatmap with text

For detailed information, might be handy to print the actual values on top of the heatmap:

theme_set(theme_bw(20))
df <- correlation.table
p <- ggplot(df, aes(X1, X2, group=X2)) 
p <- p + geom_tile(aes(fill = Correlation)) 
p <- p + geom_text(aes(fill = df$Correlation, label = round(df$Correlation, 1)), size = 2) 
p <- p + scale_fill_gradientn("Correlation", 
                      breaks = seq(from = -1, to = 1,  by = 0.25), 
                      colours = c("blue", "white", "red"), 
                  limits = c(-1, 1)) +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, face="italic")) +
        labs(x = "", y = "")
print(p)

ggcorr

An alternative way to visualize correlation matrices is provided by the ggcorr package. Note: this toy example does not consider the compositionality effect in microbial abundance correlations. See the package site for more detailed examples and many more options.

library(GGally)
ggcorr(x[, 1:10], method = c("pairwise", "spearman"), nbreaks = 20, hjust = 0.75)
ggcorr(x[, 1:10], method = c("pairwise", "spearman"), nbreaks = 20, geom = "circle")
ggcorr(x[, 1:10], method = c("pairwise", "spearman"), nbreaks = 20, label = TRUE, label_alpha = TRUE)
ggcorr(data = NULL, cor_matrix = cor(x[, 1:10], use = "everything"), low = "steelblue", mid = "white", high = "darkred", midpoint = 0)