Appendix D — Example solutions for exercises

D.1 Introduction

These example solutions are related to exercises in Chapter 1 and Chapter 2.

D.2 Data containers

These example solutions are related to exercises in Chapter 3.

D.3 Import

These example solutions are related to exercises in Chapter 4.

D.4 Convert & export

These example solutions are related to exercises in Chapter 5.

Show the solution

# Load example dataset
library(mia)
data("GlobalPatterns", package = "mia")
tse <- GlobalPatterns

# Convert to Phyloseq and reconvert to a new Dataset
ps <- convertToPhyloseq(tse)
tse_new <- convertFromPhyloseq(ps)

# Save the reconverted file to a RDS file and read it
saveRDS(tse_new, "tse.rds")
tse_read <- readRDS("tse.rds")

# Converting TreeSE to long format and writing it in a CSV file
lf <- meltSE(tse_read)
write.csv(lf, "long_format_tse.csv")

D.5 Taxonomic information

These example solutions are related to exercises in Chapter 6.

Show the solution

# load example dataset
library(mia)
library(miaViz)
data("Tengeler2020", package = "mia")
tse <- Tengeler2020

# Available taxonomic ranks
taxonomyRanks(tse)

# Unique taxonomic features from class
getUnique(tse, rank = "Order")

# Subsets the data and aligns the tree
tse_sub <- tse[1:15, ]
rowTree(tse_sub)$Nnode

tse_sub <- subsetByLeaf(tse_sub, rowLeaf = rownames(tse_sub))
rowTree(tse_sub)$Nnode

# Creates plot for the phylogeny
p1 <- plotRowTree(tse_sub)

# Creates taxonomic tree
tse_sub_hierarchy <- tse_sub
tse_sub_hierarchy <- addHierarchyTree(tse_sub_hierarchy)

# Creates plot for the taxonomic tree
p2 <- plotRowTree(tse_sub_hierarchy)

# Plots trees
library(patchwork)
p1 + p2

D.6 Data wrangling

These example solutions are related to exercises in Chapter 7.

Show the solution

# Load first example dataset
library(mia)
data("enterotype", package = "mia")
tse <- enterotype

# Visualize column metadata
colData(tse) |> head()

# Create variable 'group' containing 3 random classifications for each entry
tse[["group"]] <- sample(
    c("group_1", "group_2", "group_3"),
    ncol(tse),
    replace = TRUE
)

# Plot how groups are distributed
library(miaViz)
plotBarplot(tse, col.var = "group")

# Split groups into two different tse objects and outputs a list
tse_list <- splitOn(tse, by = 2, group = "group")

# Count numbers of entries in the group
tse_list[[1]][["group"]] |> table()
tse_list[[2]][["group"]] |> table()
tse_list[[3]][["group"]] |> table()

# Sum the abundances correspondent to each group and
# merge them back into a single tse object
tse_list <- lapply(tse_list, function(x){
    mat <- assay(x, "counts")
    sum_val <- sum(mat)
    x[["summed_value"]] <- sum_val
    return(x)
})
tse <- unsplitOn(tse_list)

# Load second example dataset and merges them
data("GlobalPatterns")
tse <- mergeSEs(tse, GlobalPatterns)
colData(tse)

D.7 Exploration & quality control

These example solutions are related to exercises in Chapter 9.

Show the solution

# Load example dataset
library(mia)
library(miaViz)
library(scuttle)
data("GlobalPatterns")
tse <- GlobalPatterns

# Plot counts with a histogram
plotHistogram(tse, assay.type = "counts")

# Add prevalence to rowData
tse <- addPrevalence(tse)

# Plot the distribution of prevalence values
plotHistogram(tse, row.var = "prevalence")

# Add library size to colData
tse <- addPerCellQCMetrics(tse)
# And visualize them
plotHistogram(tse, col.var = "total")

# Plot categorical values with a barplot
plotBarplot(tse, col.var = "SampleType")

# Available taxonomic ranks
taxonomyRanks(tse)

# Checks the dominance of genera
summarizeDominance(tse, rank = "Genus")

# Get the most prevalent taxa
getPrevalent(tse, rank = "Genus", prevalence = 0.2, detection = 1)

# Get the most abundant taxa based on their median abundance
getTop(tse, method = "median")

# Visualzie the most prevalent features
plotAbundanceDensity(tse)

D.8 Subsetting

These example solutions are related to exercises in ?sec-subsetting.

Show the solution

# Loading one of the available example datasets
library(mia)
data("GlobalPatterns", package = "mia")
tse <- GlobalPatterns

# Adds total size of all libraries to colData
library(scuttle)
tse <- addPerCellQCMetrics(tse)

# Visualizes colData and plots distribution of library sizes
colData(tse) |> head()
library(miaViz)
plotHistogram(x = tse, col.var = "total")

# Subsets based on library size and then subsets again based on feature prevalence
tse_library <- tse[, tse$total > 500000]
tse_subset <- subsetByPrevalent(tse_library, detection = 1, prevalence = 2/10)

D.9 Agglomeration

These example solutions are related to exercises in Chapter 11.

Show the solution

# Load packages and dataset
library(mia)
data("GlobalPatterns")
tse <- GlobalPatterns

# Show taxonomic ranks
taxonomyRanks(tse)

# Show the unique values for a rank
getUnique(tse, rank = "Phylum")

# Agglomerate by rank and shows # of rows
tse_agglomerated <- agglomerateByRank(tse, rank = "Phylum")
nrow(tse_agglomerated)

# Show rowData
rowData(tse_agglomerated)

# Agglomerate by all ranks and shows one
tse_all_ranks <- agglomerateByRanks(tse)
altExpNames(tse_all_ranks)
altExp(tse_all_ranks, "Phylum")

# Agglomerate by prevalence
tse_prevalence <- agglomerateByPrevalence(tse)
altExp(tse, "prevalent") <- agglomerateByPrevalence(tse)

# Show column variables and merges the data based on one
colData(tse) |> colnames()
tse_variable <- agglomerateByVariable(tse, group = "SampleType", by = "cols")

D.10 Transformation

These example solutions are related to exercises in Chapter 12.

Show the solution

# Load the example dataset
library(mia)
library(miaViz)
data("GlobalPatterns")
tse <- GlobalPatterns

# Visualize counts
plotHistogram(tse, assay.type = "counts")

# Transformations on the counts assay
tse <- transformAssay(tse, assay.type = "counts", method = "relabundance")
tse <- transformAssay(tse, assay.type = "counts", method = "clr", pseudocount = 1)

assayNames(tse)

# Plots the two different assays
library(patchwork)
p1 <- plotHistogram(tse, assay.type = "counts")
p2 <- plotHistogram(tse, assay.type = "clr")

p1 + p2

# Subset the abundance table
clr_matrix <- assay(tse, "clr")
clr_matrix[1:100, 1:10]

# Agglomerate data by ranks
tse <- agglomerateByRanks(tse)

# Apply transformations across altExp
tse <- transformAssay(
    tse, altexp = altExpNames(tse), assay.type = "counts", method = "relabundance")
tse <- transformAssay(
    tse, altexp = altExpNames(tse), assay.type = "counts", method = "clr", pseudocount = 1)

# phiLR transformation on family level
tse <- agglomerateByRank(tse, rank = "Family")
tse <- transformAssay(
    tse, assay.type = "counts", method = "philr", MARGIN = 1L, pseudocount = TRUE)

assay(altExp(tse, "philr"))

D.11 Community composition

These example solutions are related to exercises in Chapter 13.

D.12 Alpha Diversity

These example solutions are related to exercises in Chapter 14.

D.13 Community similarity

These exercises are related to exercises in Chapter 15.

D.14 Community typing

These example solutions are related to exercises in Chapter 16.

D.15 Differential abundance

These example solutions are related to exercises in Chapter 17.

D.16 Correlation

These example solutions are related to exercises in Chapter 18.

D.17 Mediation

These example solutions are related to exercises in Chapter 19.

D.18 Network learning & analysis

These example solutions are related to exercises in Chapter 20.

D.19 Network comparison

These example solutions are related to exercises in Chapter 21.

D.20 Cross-association

These example solutions are related to exercises in Chapter 23.

D.21 Ordination-based multiassay analysis

These example solutions are related to exercises in Chapter 24.

D.22 Multi-omics prediction and classification

These example solutions are related to exercises in Chapter 25.

D.23 Machine learning

These example solutions are related to exercises in Chapter 26.