8 Beta diversity demo
8.1 Visualizations
Lets generate ordination plots with different methods and transformations.
#### calculating Bray Curtis dissimilarity and PCoA
tse <- transformSamples(tse, method = "relabundance")
tse <- runMDS(tse, FUN = vegan::vegdist, method = "bray", name = "PCoA_BC", exprs_values = "relabundance")
p <- plotReducedDim(tse, "PCoA_BC", colour_by = "patient_status")
# Add explained variance for each axis
e <- attr(reducedDim(tse, "PCoA_BC"), "eig");
rel_eig <- e/sum(e[e>0])
p <- p + labs(x = paste("PCoA 1 (", round(100 * rel_eig[[1]],1), "%", ")", sep = ""),
y = paste("PCoA 2 (", round(100 * rel_eig[[2]],1), "%", ")", sep = ""))
print(p)
#### Aitchinson distances and PCA
tse <- transformSamples(tse, method = "clr", pseudocount = 1)## Warning: All the total abundances of samples do not sum-up to a fixed constant.
## Please consider to apply, e.g., relative transformation in prior to CLR
## transformation.
tse <- runMDS(tse, FUN = vegan::vegdist, name = "MDS_euclidean",
method = "euclidean", exprs_values = "clr")
p <- plotReducedDim(tse, "MDS_euclidean", colour_by = "patient_status_vs_cohort")
# Add explained variance for each axis
e <- attr(reducedDim(tse, "MDS_euclidean"), "eig");
rel_eig <- e/sum(e[e>0])
p <- p + labs(x = paste("Axis 1 (", round(100 * rel_eig[[1]],1), "%", ")", sep = ""),
y = paste("Axis 2 (", round(100 * rel_eig[[2]],1), "%", ")", sep = ""))
print(p)
PCA is a subtype of MDS with Euclidean distances, below is a different alternative for running the same analysis.
# alternative method
tse <- runPCA(tse, name = "PCA", exprs_values = "clr", ncomponents = 10)
plotReducedDim(tse, "PCA", colour_by = "patient_status_vs_cohort")
One can use also ggplot for ordination plots for the flexible adaptablity.
dis <- vegan::vegdist(t(assays(tse)$counts), method = "jaccard")
# principal coordinate analysis
jaccard_pcoa <- ecodist::pco(dis)
# a data frame from principal coordinates and groupng variable
jaccard_pcoa_df <- data.frame(pcoa1 = jaccard_pcoa$vectors[,1],
pcoa2 = jaccard_pcoa$vectors[,2],
patient_status_vs_cohort = colData(tse)$patient_status_vs_cohort)
# plot
jaccard_plot <- ggplot(data = jaccard_pcoa_df, aes(x=pcoa1, y=pcoa2, color = patient_status_vs_cohort)) +
geom_point() +
labs(x = paste("Axis 1 (", round(100 * jaccard_pcoa$values[[1]] / sum(jaccard_pcoa$values), 1), "%", ")", sep = ""),
y = paste("Axis 2 (", round(100 * jaccard_pcoa$values[[2]] / sum(jaccard_pcoa$values), 1), "%", ")", sep = ""),
title = "Jaccard PCoA") +
theme(title = element_text(size = 12)) +
theme_light()
jaccard_plot
8.2 Hypothesis testing
PERMANOVA with the function adonis is most commonly used to detect differences in multivariate data. adonis function was recently updated with slightly different functionality. Now the adonis2 allows independent analysis of terms.
variable_names <- c("patient_status", "cohort")
tse_genus <- agglomerateByRank(tse, "Genus")## Warning: 'clr' includes negative values.
## Agglomeration of it might lead to meaningless values.
## Check the assay, and consider doing transformation again manually with agglomerated data.
# Apply relative transform
tse_genus <- transformSamples(tse_genus, method = "relabundance")
set.seed(12346)
# We choose 99 random permutations for speed. Consider applying more (999 or 9999)
assay <- t(assay(tse_genus,"relabundance"))
mod <- paste("assay ~", paste(variable_names, collapse="+")) %>% as.formula()
permanova2 <- vegan::adonis2(mod,
by = "margin", # each term analyzed individually
data = colData(tse),
method = "bray",
permutations = 99)
print(permanova2)## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 99
##
## vegan::adonis2(formula = mod, data = colData(tse), permutations = 99, method = "bray", by = "margin")
## Df SumOfSqs R2 F Pr(>F)
## patient_status 1 0.1885 0.05817 1.490 0.23
## cohort 2 0.1450 0.04474 0.573 0.75
## Residual 23 2.9104 0.89787
## Total 26 3.2414 1.00000
# older adonis for reference
permanova <- vegan::adonis(mod,
#by = "margin", # each term analyzed sequentially
data = colData(tse),
method = "bray",
permutations = 99)## 'adonis' will be deprecated: use 'adonis2' instead
permanova$aov.tab## Permutation: free
## Number of permutations: 99
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## patient_status 1 0.1860 0.186024 1.47011 0.05739 0.22
## cohort 2 0.1450 0.072503 0.57298 0.04474 0.79
## Residuals 23 2.9104 0.126537 0.89787
## Total 26 3.2414 1.00000
With older adonis version one cam calculate top coefficients driving the differences between groups.
# older adonis supplies the coefficients
coef <- coefficients(permanova)["cohort1",]
top.coef <- sort(head(coef[rev(order(abs(coef)))],20))
# plot
top_taxa_coeffient_plot <- ggplot(data.frame(x = top.coef,
y = factor(names(top.coef),
unique(names(top.coef)))),
aes(x = x, y = y)) +
geom_bar(stat="identity") +
labs(x="", y="", title="Top Taxa") +
theme_bw()
top_taxa_coeffient_plot
8.2.1 Testing the differences in dispersion
PEMRANOVA doesn’t differentiate between different within-group variation, i.e. dispersion, or the mean differences between groups, i.e. the location of the centroid. Follow-up testing can be done with PERMDISP2 implemented in the vegan package.
dis <- vegan::vegdist(t(assays(tse)$counts), method = "bray")
b <- vegan::betadisper(dis, colData(tse)$cohort)
print(anova(b))## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.000375 0.0001875 0.0166 0.9835
## Residuals 24 0.270795 0.0112831
# boxplor for distances to centroid
p <- cbind(distance = as.numeric(b$distances),
cohort = colData(tse)$cohort) %>%
as_tibble() %>%
mutate(distance = as.numeric(distance)) %>%
ggplot(aes(cohort, distance)) +
geom_boxplot() +
theme_light()
print(p)
End of the demo.
8.3 Exercises
Do “Beta diversity” from the exercises.
## deldir (NA -> 1.0-6 ) [CRAN]
## interp (NA -> 1.1-2 ) [CRAN]
## htmlTable (2.4.0 -> 2.4.1 ) [CRAN]
## latticeExtra (0.6-29 -> 0.6-30) [CRAN]
## * checking for file ‘/tmp/RtmpzBk5ZT/remotes5b840995f80/FrederickHuangLin-ANCOMBC-093ce61/DESCRIPTION’ ... OK
## * preparing ‘ANCOMBC’:
## * checking DESCRIPTION meta-information ... OK
## * installing the package to process help pages
## Loading required namespace: ANCOMBC
## Warning: replacing previous import ‘microbiome::coverage’ by ‘SummarizedExperiment::coverage’ when loading ‘ANCOMBC’
## Warning: replacing previous import ‘SummarizedExperiment::distance’ by ‘phyloseq::distance’ when loading ‘ANCOMBC’
## Warning: replacing previous import ‘SummarizedExperiment::start’ by ‘stats::start’ when loading ‘ANCOMBC’
## Warning: replacing previous import ‘SummarizedExperiment::end’ by ‘stats::end’ when loading ‘ANCOMBC’
## * saving partial Rd database
## * checking for LF line-endings in source and make files and shell scripts
## * checking for empty or unneeded directories
## * building ‘ANCOMBC_1.99.1.tar.gz’
library(mia)
library(patchwork)
library(tidySummarizedExperiment)
library(ANCOMBC)
library(ALDEx2)
library(Maaslin2)
library(knitr)
library(tidyverse)
# LinDA is a very new package that we therefore install separately
# directly from Github:
if (!"LinDA" %in% installed.packages()[, "Package"]) {
devtools::install_github("zhouhj1994/LinDA")
}## * checking for file ‘/tmp/RtmpzBk5ZT/remotes5b84ee47b90/zhouhj1994-LinDA-bf65791/DESCRIPTION’ ... OK
## * preparing ‘LinDA’:
## * checking DESCRIPTION meta-information ... OK
## * checking for LF line-endings in source and make files and shell scripts
## * checking for empty or unneeded directories
## * building ‘LinDA_0.1.0.tar.gz’
library(LinDA)# import our dataset
tse <- read_rds("data/Tengeler2020/tse.rds")