This section explores the structure of the GlobalPatterns microbiome dataset, including the number of samples and how they are distributed across different environments.
# Load required libraries
library(phyloseq)
library(ggplot2)
library(patchwork)
library(vegan)
library(knitr)
library(kableExtra)
# Load dataset
data("GlobalPatterns")
# Assign to a working variable
ps <- GlobalPatterns
# Create the table
sample_counts <- table(sample_data(ps)$SampleType)
# Display with kable and white background
kable(sample_counts,
col.names = c("Sample Type", "Count"),
caption = "Number of Samples per Environment Type",
format = "html") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "center") %>%
row_spec(0, bold = TRUE, background = "#2E86AB", color = "white") %>%
column_spec(1:2, background = "white", color = "black")
| Sample Type | Count |
|---|---|
| Feces | 4 |
| Freshwater | 2 |
| Freshwater (creek) | 3 |
| Mock | 3 |
| Ocean | 3 |
| Sediment (estuary) | 3 |
| Skin | 3 |
| Soil | 3 |
| Tongue | 2 |
To improve analysis quality, we remove low-abundance taxa that may introduce noise.
# Remove low-abundance taxa (total count <= 3)
ps_filtered <- prune_taxa(taxa_sums(ps) > 3, ps)
# Check how many taxa remain
ntaxa(ps_filtered)
## [1] 14797
This analysis uses Principal Coordinates Analysis (PCoA) with Bray-Curtis distance to visualize differences in microbial community composition across sample types.
# Perform ordination using Bray-Curtis distance
ordu <- ordinate(ps_filtered, method = "PCoA", distance = "bray")
# Create PCoA plot
p_A <- plot_ordination(ps_filtered, ordu, color = "SampleType") +
geom_point(size = 4, alpha = 0.8) +
theme_minimal() +
labs(title = "A: Microbial Community Structure (PCoA)")
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the phyloseq package.
## Please report the issue at <https://github.com/joey711/phyloseq/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Display plot
p_A
# Save plot
ggsave("../results/A_pcoa.png", p_A,
width = 7, height = 5, dpi = 300)
This analysis measures within-sample diversity using the Shannon diversity index, which accounts for both richness and evenness of microbial species.
# Calculate Shannon diversity
div <- estimate_richness(ps_filtered, measures = "Shannon")
# Add sample type information
div$SampleType <- sample_data(ps_filtered)$SampleType
# Create boxplot
p_B <- ggplot(div, aes(x = SampleType, y = Shannon, fill = SampleType)) +
geom_boxplot() +
theme_minimal() +
labs(title = "B: Alpha Diversity (Shannon Index)",
x = "Sample Type",
y = "Shannon Diversity Index") +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none")
# Display plot
p_B
# Save plot
ggsave("../results/B_diversity.png", p_B, width = 6, height = 5, dpi = 300)
This visualization shows the relative abundance of microbial taxa at the phylum level across different sample types.
# Transform counts to relative abundance
ps_rel <- transform_sample_counts(ps_filtered, function(x) x / sum(x))
# Optional: keep only top taxa for cleaner plot
ps_top <- prune_taxa(names(sort(taxa_sums(ps_rel), decreasing = TRUE)[1:10]), ps_rel)
# Create bar plot
p_C <- plot_bar(ps_top, fill = "Phylum") +
facet_wrap(~SampleType, scales = "free_x") +
theme_minimal() +
labs(title = "C: Taxonomic Composition (Top 10 Phyla)",
x = "Samples",
y = "Relative Abundance") +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank())
# Display plot
p_C
# Save plot
ggsave("../results/C_taxa_barplot.png", p_C, width = 10, height = 6, dpi = 300)
This visualization represents the evolutionary relationships between microbial taxa in the dataset.
library(phyloseq)
# Subset to Chlamydiae only (use your filtered dataset)
ps_chl <- subset_taxa(ps_filtered, Phylum == "Chlamydiae")
# Remove taxa with zero counts after subsetting
ps_chl <- prune_taxa(taxa_sums(ps_chl) > 0, ps_chl)
# Create phylogenetic tree plot
p_D <- plot_tree(ps_chl,
color = "SampleType",
shape = "Family",
label.tips = "Genus",
size = "abundance",
plot.margin = 0.6) +
ggtitle("D: Phylogenetic Tree (Chlamydiae)")
# Display plot
p_D
# Save plot
ggsave("../results/D_phylogenetic_tree_chlamydiae.png", p_D,
width = 14, height = 10, dpi = 300)