See Composition page for further microbiota composition heatmaps, as well as the phyloseq tutorial and Neatmaps. Moreover, the aheatmap function of the NMF package provides further high quality heatmap plotting capabilities with row and column annotation color bars, clustering trees and other useful features that are often missing from standard heatmap tools in R.
Load some example data:
library(microbiome) # Load libraries
library(phyloseq)
library(dplyr)
library(reshape2)
library(knitr)
data(peerj32)
pseq <- peerj32$phyloseq # Rename data
# Pick data subset (DI samples from Phylum Bacteroidetes)
pseq2 <- pseq %>%
subset_taxa(Phylum == "Bacteroidetes") %>%
subset_samples(group == "LGG")
# Z transformed abundance data
pseqz <- microbiome::transform(pseq2, "Z")
Visualize the Z-transformed abundance matrix
# Plot the abundances heatmap, round values to 2 decimals
dfm <- melt(round(abundances(pseqz), 1))
colnames(dfm) <- c("Taxa", "Sample", "value")
heat(dfm, "Taxa", "Sample", "value") +
theme(text=element_text(size=10),
axis.text.x = element_text(angle = 90, hjust = 1),
legend.key.size = unit(1.2, "cm"))
Find visually appealing order for rows and columns with the Neatmap approach:
# Sort the matrix rows and cols directly
xo <- neat(abundances(pseqz), method = "NMDS", distance = "euclidean")
# Heatmap visualization, round to two decimals
dfm <- melt(round(xo, 1))
colnames(dfm) <- c("Taxa", "Sample", "value")
heat(dfm, "Taxa", "Sample", "value") +
theme(text=element_text(size=10),
axis.text.x = element_text(angle = 90, hjust = 1),
legend.key.size = unit(1.3, "cm"))
# or use a shortcut to sorting rows (or columns) if just the order was needed
sorted.rows <- neatsort(abundances(pseqz), "rows", method = "NMDS", distance = "euclidean")
Cross-correlate columns of two data sets from related to microbiome and blood serum lipids associations (PeerJ 1:e32).
The function returns correlations, raw p-values, and fdr estimates (not strictly proper as the comparisons are not independent). Keep only those elements that have at least only one significant correlation (n.signif):
# Load example data
otu <- peerj32$microbes
lipids <- peerj32$lipids
# Define data sets to cross-correlate
x <- log10(otu) # OTU Log10 (44 samples x 130 genera)
y <- as.matrix(lipids) # Lipids (44 samples x 389 lipids)
# Cross correlate data sets
correlations <- associate(x, y, method = "spearman", mode = "matrix", p.adj.threshold = 0.05, n.signif = 1)
# Or, alternatively, the same output is also available in a handy table format
correlation.table <- associate(x, y, method = "spearman", mode = "table", p.adj.threshold = 0.05, n.signif = 1)
kable(head(correlation.table))
X1 | X2 | Correlation | p.adj | |
---|---|---|---|---|
1648 | Ruminococcus gnavus et rel. | TG(54:5).2 | 0.7164958 | 0.0022842 |
384 | Moraxellaceae | PC(40:3e) | -0.6941863 | 0.0029225 |
1829 | Uncultured Bacteroidetes | TG(56:2).1 | -0.6987375 | 0.0029225 |
349 | Lactobacillus plantarum et rel. | PC(40:3) | -0.6877976 | 0.0031520 |
1198 | Ruminococcus gnavus et rel. | TG(52:5) | 0.6806216 | 0.0037518 |
264 | Moraxellaceae | PC(38:4).1 | -0.6700504 | 0.0038414 |
Rearrange the data and plot the heatmap and mark significant correlations with stars to reproduce microbiota-lipidome heatmap from Lahti et al. PeerJ (2013) (the ordering of rows and columns may be different):
p <- heat(correlation.table, "X1", "X2",
fill = "Correlation",
star = "p.adj",
p.adj.threshold = 0.05)
p + theme(text=element_text(size=10),
axis.text.x = element_text(angle = 90, hjust = 1),
legend.key.size = unit(1.3, "cm"))
The above examples provide handy shortcuts for heatmap visualization. You can also directly modify the ggplot2 routines. This time, let us set q-value threshold also for cell coloring:
# Order the rows and columns with levels argument if needed:
correlation.table$X1 <- factor(correlation.table$X1, levels = unique(as.character(correlation.table$X1)))
correlation.table$X2 <- factor(correlation.table$X2, levels = unique(as.character(correlation.table$X2)))
# Set black-and-white theme
library(ggplot2)
theme_set(theme_bw())
# Pick only the correlations with q<0.05
# Note: this will leave other cells empty
library(dplyr)
subtable <- filter(correlation.table, p.adj < 0.05)
# Arrange the figure
p <- ggplot(subtable, aes(x = X1, y = X2, fill = Correlation))
p <- p + geom_tile()
p <- p + scale_fill_gradientn("Correlation",
breaks = seq(from = -1, to = 1, by = 0.2),
colours = c("darkblue", "blue", "white", "red", "darkred"),
limits = c(-1,1))
# Polish texts
p <- p + theme(axis.text.x=element_text(angle = 90, hjust=1, face = "italic"),
axis.text.y=element_text(size = 8))
p <- p + xlab("") + ylab("")
# Mark the most significant cells with stars
p <- p + geom_text(data = subset(correlation.table, p.adj < 0.02),
aes(x = X1, y = X2, label = "+"), col = "white", size = 5)
# Plot
print(p)
For detailed information, might be handy to print the actual values on top of the heatmap:
theme_set(theme_bw(20))
df <- correlation.table
p <- ggplot(df, aes(X1, X2, group=X2))
p <- p + geom_tile(aes(fill = Correlation))
p <- p + geom_text(aes(fill = df$Correlation, label = round(df$Correlation, 1)), size = 2)
p <- p + scale_fill_gradientn("Correlation",
breaks = seq(from = -1, to = 1, by = 0.25),
colours = c("blue", "white", "red"),
limits = c(-1, 1)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, face="italic")) +
labs(x = "", y = "")
print(p)
An alternative way to visualize correlation matrices is provided by the ggcorr package. Note: this toy example does not consider the compositionality effect in microbial abundance correlations. See the package site for more detailed examples and many more options.
library(GGally)
ggcorr(x[, 1:10], method = c("pairwise", "spearman"), nbreaks = 20, hjust = 0.75)
ggcorr(x[, 1:10], method = c("pairwise", "spearman"), nbreaks = 20, geom = "circle")
ggcorr(x[, 1:10], method = c("pairwise", "spearman"), nbreaks = 20, label = TRUE, label_alpha = TRUE)
ggcorr(data = NULL, cor_matrix = cor(x[, 1:10], use = "everything"), low = "steelblue", mid = "white", high = "darkred", midpoint = 0)