Normalization and group-wise comparisons with DESeq2

Examples adapted from Callahan et al. F1000 (2017).

Load example data:

# Load libraries
library(microbiome)
library(ggplot2)
library(magrittr)
library(dplyr)
# Probiotics intervention example data 
data(dietswap) 

# Only check the core taxa to speed up examples
pseq <- core(dietswap, detection = 50, prevalence = 80/100)

Toy example, to be polished:

library(phyloseq)
library(reshape2)
library(DESeq2)
library(knitr)
library(magrittr)
# Running the DESeq2 analysis
ds2 <- phyloseq_to_deseq2(pseq, ~ nationality)
dds <- DESeq(ds2)
res <- results(dds)
df <- as.data.frame(res)
df$taxon <- rownames(df)
df <- df %>% arrange(log2FoldChange, padj)

library(knitr)
print(head(kable((df))))
## [1] "|                                     |  baseMean| log2FoldChange|     lfcSE|       stat|    pvalue|      padj|taxon                                |"
## [2] "|:------------------------------------|---------:|--------------:|---------:|----------:|---------:|---------:|:------------------------------------|"
## [3] "|Bacteroides vulgatus et rel.         | 1433.9869|     -3.1720779| 0.2058346| -15.410809| 0.0000000| 0.0000000|Bacteroides vulgatus et rel.         |"
## [4] "|Subdoligranulum variable at rel.     |  203.1597|     -0.6287847| 0.1318531|  -4.768828| 0.0000019| 0.0000051|Subdoligranulum variable at rel.     |"
## [5] "|Butyrivibrio crossotus et rel.       |  186.4217|     -0.4881398| 0.1519530|  -3.212440| 0.0013161| 0.0018097|Butyrivibrio crossotus et rel.       |"
## [6] "|Clostridium symbiosum et rel.        |  236.2309|     -0.4867323| 0.1304717|  -3.730559| 0.0001911| 0.0003002|Clostridium symbiosum et rel.        |"

Validating DESeq2 results

# Identify top taxa based on standard ANOVA
source(system.file("extdata/check_anova.R", package = "microbiome"))
ano <- check_anova(pseq, "nationality");
ano$log2FC <- log2(ano$ave.AFR) - log2(ano$ave.AAM)
taxa.anova <- as.character(subset(ano, padj < 0.01 & abs(log2FC) > log2(2))$taxa)

# lowPick the top taxa based on DESEq2
taxa.deseq <- subset(res, padj < 0.01 & abs(log2FoldChange) > log2(2))$taxon

# Check overlap
# Most DESEq2 taxa are confirmed with ANOVA
library(gplots)

# Also the est p-values are well correlated (higher not so)
mf <- data.frame(df$padj, ano$padj)
p <- ggplot(mf, aes(x = log10(df$padj), y = log10(ano$padj))) +
       labs(x = 'DESeq2 adjusted p-value', y = 'ANOVA adjusted p-value') +
       geom_point()
print(p)

library(venn) # Check UpSet plot instead
venn( list(ANOVA = taxa.anova,DESeq2 = taxa.deseq))