Examples adapted from Callahan et al. F1000 (2017).
Load example data:
# Load libraries
library(microbiome)
library(ggplot2)
library(magrittr)
library(dplyr)
# Probiotics intervention example data
data(dietswap)
# Only check the core taxa to speed up examples
pseq <- core(dietswap, detection = 50, prevalence = 80/100)
Toy example, to be polished:
library(phyloseq)
library(reshape2)
library(DESeq2)
library(knitr)
library(magrittr)
# Running the DESeq2 analysis
ds2 <- phyloseq_to_deseq2(pseq, ~ nationality)
dds <- DESeq(ds2)
res <- results(dds)
df <- as.data.frame(res)
df$taxon <- rownames(df)
df <- df %>% arrange(log2FoldChange, padj)
library(knitr)
print(head(kable((df))))
## [1] "| | baseMean| log2FoldChange| lfcSE| stat| pvalue| padj|taxon |"
## [2] "|:------------------------------------|---------:|--------------:|---------:|----------:|---------:|---------:|:------------------------------------|"
## [3] "|Bacteroides vulgatus et rel. | 1433.9869| -3.1720779| 0.2058346| -15.410809| 0.0000000| 0.0000000|Bacteroides vulgatus et rel. |"
## [4] "|Subdoligranulum variable at rel. | 203.1597| -0.6287847| 0.1318531| -4.768828| 0.0000019| 0.0000051|Subdoligranulum variable at rel. |"
## [5] "|Butyrivibrio crossotus et rel. | 186.4217| -0.4881398| 0.1519530| -3.212440| 0.0013161| 0.0018097|Butyrivibrio crossotus et rel. |"
## [6] "|Clostridium symbiosum et rel. | 236.2309| -0.4867323| 0.1304717| -3.730559| 0.0001911| 0.0003002|Clostridium symbiosum et rel. |"
# Identify top taxa based on standard ANOVA
source(system.file("extdata/check_anova.R", package = "microbiome"))
ano <- check_anova(pseq, "nationality");
ano$log2FC <- log2(ano$ave.AFR) - log2(ano$ave.AAM)
taxa.anova <- as.character(subset(ano, padj < 0.01 & abs(log2FC) > log2(2))$taxa)
# lowPick the top taxa based on DESEq2
taxa.deseq <- subset(res, padj < 0.01 & abs(log2FoldChange) > log2(2))$taxon
# Check overlap
# Most DESEq2 taxa are confirmed with ANOVA
library(gplots)
# Also the est p-values are well correlated (higher not so)
mf <- data.frame(df$padj, ano$padj)
p <- ggplot(mf, aes(x = log10(df$padj), y = log10(ano$padj))) +
labs(x = 'DESeq2 adjusted p-value', y = 'ANOVA adjusted p-value') +
geom_point()
print(p)
library(venn) # Check UpSet plot instead
venn( list(ANOVA = taxa.anova,DESeq2 = taxa.deseq))