8 Beta diversity demo
8.1 Visualizations
Lets generate ordination plots with different methods and transformations.
#### calculating Bray Curtis dissimilarity and PCoA
<- transformSamples(tse, method = "relabundance")
tse <- runMDS(tse, FUN = vegan::vegdist, method = "bray", name = "PCoA_BC", exprs_values = "relabundance")
tse
<- plotReducedDim(tse, "PCoA_BC", colour_by = "patient_status")
p
# Add explained variance for each axis
<- attr(reducedDim(tse, "PCoA_BC"), "eig");
e <- e/sum(e[e>0])
rel_eig <- p + labs(x = paste("PCoA 1 (", round(100 * rel_eig[[1]],1), "%", ")", sep = ""),
p y = paste("PCoA 2 (", round(100 * rel_eig[[2]],1), "%", ")", sep = ""))
print(p)
#### Aitchinson distances and PCA
<- transformSamples(tse, method = "clr", pseudocount = 1) tse
## Warning: All the total abundances of samples do not sum-up to a fixed constant.
## Please consider to apply, e.g., relative transformation in prior to CLR
## transformation.
<- runMDS(tse, FUN = vegan::vegdist, name = "MDS_euclidean",
tse method = "euclidean", exprs_values = "clr")
<- plotReducedDim(tse, "MDS_euclidean", colour_by = "patient_status_vs_cohort")
p
# Add explained variance for each axis
<- attr(reducedDim(tse, "MDS_euclidean"), "eig");
e <- e/sum(e[e>0])
rel_eig <- p + labs(x = paste("Axis 1 (", round(100 * rel_eig[[1]],1), "%", ")", sep = ""),
p y = paste("Axis 2 (", round(100 * rel_eig[[2]],1), "%", ")", sep = ""))
print(p)
PCA is a subtype of MDS with Euclidean distances, below is a different alternative for running the same analysis.
# alternative method
<- runPCA(tse, name = "PCA", exprs_values = "clr", ncomponents = 10)
tse plotReducedDim(tse, "PCA", colour_by = "patient_status_vs_cohort")
One can use also ggplot for ordination plots for the flexible adaptablity.
<- vegan::vegdist(t(assays(tse)$counts), method = "jaccard")
dis
# principal coordinate analysis
<- ecodist::pco(dis)
jaccard_pcoa
# a data frame from principal coordinates and groupng variable
<- data.frame(pcoa1 = jaccard_pcoa$vectors[,1],
jaccard_pcoa_df pcoa2 = jaccard_pcoa$vectors[,2],
patient_status_vs_cohort = colData(tse)$patient_status_vs_cohort)
# plot
<- ggplot(data = jaccard_pcoa_df, aes(x=pcoa1, y=pcoa2, color = patient_status_vs_cohort)) +
jaccard_plot geom_point() +
labs(x = paste("Axis 1 (", round(100 * jaccard_pcoa$values[[1]] / sum(jaccard_pcoa$values), 1), "%", ")", sep = ""),
y = paste("Axis 2 (", round(100 * jaccard_pcoa$values[[2]] / sum(jaccard_pcoa$values), 1), "%", ")", sep = ""),
title = "Jaccard PCoA") +
theme(title = element_text(size = 12)) +
theme_light()
jaccard_plot
8.2 Hypothesis testing
PERMANOVA with the function adonis is most commonly used to detect differences in multivariate data. adonis function was recently updated with slightly different functionality. Now the adonis2 allows independent analysis of terms.
<- c("patient_status", "cohort")
variable_names
<- agglomerateByRank(tse, "Genus") tse_genus
## Warning: 'clr' includes negative values.
## Agglomeration of it might lead to meaningless values.
## Check the assay, and consider doing transformation again manually with agglomerated data.
# Apply relative transform
<- transformSamples(tse_genus, method = "relabundance")
tse_genus
set.seed(12346)
# We choose 99 random permutations for speed. Consider applying more (999 or 9999)
<- t(assay(tse_genus,"relabundance"))
assay
<- paste("assay ~", paste(variable_names, collapse="+")) %>% as.formula()
mod
<- vegan::adonis2(mod,
permanova2 by = "margin", # each term analyzed individually
data = colData(tse),
method = "bray",
permutations = 99)
print(permanova2)
## Permutation test for adonis under reduced model
## Marginal effects of terms
## Permutation: free
## Number of permutations: 99
##
## vegan::adonis2(formula = mod, data = colData(tse), permutations = 99, method = "bray", by = "margin")
## Df SumOfSqs R2 F Pr(>F)
## patient_status 1 0.1885 0.05817 1.490 0.23
## cohort 2 0.1450 0.04474 0.573 0.75
## Residual 23 2.9104 0.89787
## Total 26 3.2414 1.00000
# older adonis for reference
<- vegan::adonis(mod,
permanova #by = "margin", # each term analyzed sequentially
data = colData(tse),
method = "bray",
permutations = 99)
## 'adonis' will be deprecated: use 'adonis2' instead
$aov.tab permanova
## Permutation: free
## Number of permutations: 99
##
## Terms added sequentially (first to last)
##
## Df SumsOfSqs MeanSqs F.Model R2 Pr(>F)
## patient_status 1 0.1860 0.186024 1.47011 0.05739 0.22
## cohort 2 0.1450 0.072503 0.57298 0.04474 0.79
## Residuals 23 2.9104 0.126537 0.89787
## Total 26 3.2414 1.00000
With older adonis version one cam calculate top coefficients driving the differences between groups.
# older adonis supplies the coefficients
<- coefficients(permanova)["cohort1",]
coef <- sort(head(coef[rev(order(abs(coef)))],20))
top.coef
# plot
<- ggplot(data.frame(x = top.coef,
top_taxa_coeffient_plot y = factor(names(top.coef),
unique(names(top.coef)))),
aes(x = x, y = y)) +
geom_bar(stat="identity") +
labs(x="", y="", title="Top Taxa") +
theme_bw()
top_taxa_coeffient_plot
8.2.1 Testing the differences in dispersion
PEMRANOVA doesn’t differentiate between different within-group variation, i.e. dispersion, or the mean differences between groups, i.e. the location of the centroid. Follow-up testing can be done with PERMDISP2 implemented in the vegan package.
<- vegan::vegdist(t(assays(tse)$counts), method = "bray")
dis <- vegan::betadisper(dis, colData(tse)$cohort)
b print(anova(b))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 2 0.000375 0.0001875 0.0166 0.9835
## Residuals 24 0.270795 0.0112831
# boxplor for distances to centroid
<- cbind(distance = as.numeric(b$distances),
p cohort = colData(tse)$cohort) %>%
as_tibble() %>%
mutate(distance = as.numeric(distance)) %>%
ggplot(aes(cohort, distance)) +
geom_boxplot() +
theme_light()
print(p)
End of the demo.
8.3 Exercises
Do “Beta diversity” from the exercises.
## deldir (NA -> 1.0-6 ) [CRAN]
## interp (NA -> 1.1-2 ) [CRAN]
## htmlTable (2.4.0 -> 2.4.1 ) [CRAN]
## latticeExtra (0.6-29 -> 0.6-30) [CRAN]
## * checking for file ‘/tmp/RtmpzBk5ZT/remotes5b840995f80/FrederickHuangLin-ANCOMBC-093ce61/DESCRIPTION’ ... OK
## * preparing ‘ANCOMBC’:
## * checking DESCRIPTION meta-information ... OK
## * installing the package to process help pages
## Loading required namespace: ANCOMBC
## Warning: replacing previous import ‘microbiome::coverage’ by ‘SummarizedExperiment::coverage’ when loading ‘ANCOMBC’
## Warning: replacing previous import ‘SummarizedExperiment::distance’ by ‘phyloseq::distance’ when loading ‘ANCOMBC’
## Warning: replacing previous import ‘SummarizedExperiment::start’ by ‘stats::start’ when loading ‘ANCOMBC’
## Warning: replacing previous import ‘SummarizedExperiment::end’ by ‘stats::end’ when loading ‘ANCOMBC’
## * saving partial Rd database
## * checking for LF line-endings in source and make files and shell scripts
## * checking for empty or unneeded directories
## * building ‘ANCOMBC_1.99.1.tar.gz’
library(mia)
library(patchwork)
library(tidySummarizedExperiment)
library(ANCOMBC)
library(ALDEx2)
library(Maaslin2)
library(knitr)
library(tidyverse)
# LinDA is a very new package that we therefore install separately
# directly from Github:
if (!"LinDA" %in% installed.packages()[, "Package"]) {
::install_github("zhouhj1994/LinDA")
devtools }
## * checking for file ‘/tmp/RtmpzBk5ZT/remotes5b84ee47b90/zhouhj1994-LinDA-bf65791/DESCRIPTION’ ... OK
## * preparing ‘LinDA’:
## * checking DESCRIPTION meta-information ... OK
## * checking for LF line-endings in source and make files and shell scripts
## * checking for empty or unneeded directories
## * building ‘LinDA_0.1.0.tar.gz’
library(LinDA)
# import our dataset
<- read_rds("data/Tengeler2020/tse.rds") tse