UniFrac analysis

2_UniFrac.utf8
library(phyloseq)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.5-6
library(pheatmap)
library(ape)
library(stats)
library(RColorBrewer)
library(tidyverse)
## ─ Attaching packages ────────────────────────────────────────────── tidyverse 1.3.0 ─
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.1
## ✓ tidyr   1.1.1     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ─ Conflicts ─────────────────────────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Load otu table

load("1_BacterialComposition.RData")
otutab = cbind(otutab.c[,colnames(otutab.c) != "201508_Mi3"], otutab.sw)
meta_16S.sw = meta_16S.sw %>% mutate(Colony = Site)
meta = rbind(meta_16S.c, meta_16S.sw[,colnames(meta_16S.c)])
rownames(meta) = meta$ID

read tree & rooting

Reference: https://groups.google.com/forum/#!topic/qiime-forum/rGXLnfnHbFA

tree = read.tree("otus.outgroup.mafft.FastTree.tre")
tree = phy_tree(tree)
outgroup = "Archaea"
tree = root(tree,outgroup,r=T) # outgroupをrootに指定
tree = drop.tip(tree,outgroup) # outgroupを除外

Weighted UniFrac

# Make phyloseq object
otutab = otu_table(otutab, taxa_are_rows=T)
physeq = phyloseq(otutab)
physeq = merge_phyloseq(physeq, tree)

# Calculate weighted UniFrac
ud = UniFrac(physeq, weighted=T, normalized=T, fast=T)
dist_mat = as.matrix(ud)

# Heatmap
figmat = dist_mat[grep("SW", rownames(dist_mat), invert=T), grep("SW", colnames(dist_mat), invert=T)]
meta_16S.c = meta_16S.c[rownames(figmat), ]
meta_16S.c = meta_16S.c %>% arrange(Colony,Month)
figmat = figmat[meta_16S.c$ID, meta_16S.c$ID]
annotation = data.frame(Colony=meta_16S.c$Colony, row.names = rownames(figmat))
color.colony = c(rev(brewer.pal(3, "Blues")), brewer.pal(3, "Greens")[3], rev(brewer.pal(3, "Reds")))
names(color.colony) = c("Is1", "Is2", "Is3", "Ky1", "Mi1", "Mi2", "Mi3")
annotation_colors = list(Colony = color.colony)
pheatmap(figmat,
         cluster_rows = F,
         cluster_cols = F,
         annotation_row = annotation,
         annotation_col = annotation,
         labels_row = meta_16S.c$Month,
         labels_col = meta_16S.c$Month,
         annotation_colors = annotation_colors,
         color = rev(colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)),
         filename = "2_Weighted_UniFrac.Heatmap.Coral.pdf",
         width = 7.5, 
         height = 6.5)

PCoA

library(stats)
cmds = cmdscale(dist_mat, eig=T)
coordinates = cmds$points; colnames(coordinates) = c("PCo1", "PCo2")
eigs = cmds$eig
comps = eigs / sum(eigs) * 100

# Figmat
figmat = coordinates %>% as.data.frame() %>% rownames_to_column("ID") %>%
           mutate(Type=ifelse(grepl("SW", ID), "SW", "Coral"),
                  Colony = meta[ID, "Colony"],
                  Month = meta[ID, "Month"])

color.colony = c(rev(brewer.pal(3, "Blues")), brewer.pal(3, "Greens")[3], rev(brewer.pal(3, "Reds")), "blue", "red")
names(color.colony) = c("Is1", "Is2", "Is3", "Ky1", "Mi1", "Mi2", "Mi3", "Is", "Mi")
shape = c(SW=1, Coral=16)

ggplot(figmat) + 
  geom_point(aes(x=PCo1, y=PCo2, color=Colony, shape=Type), size=2.) + 
  scale_color_manual(values=color.colony) +
  scale_shape_manual(values = shape) +
  theme_bw() +
  xlab(paste0("PCo 1 (", round(comps[1]), "%)")) + 
  ylab(paste0("PCo 2 (", round(comps[2]), "%)")) + 
  ggsave("2_Weighted_UniFrac_PCoA.pdf", width=4, height=3)

PERMANOVA

Reference: vegan tutorial page 32

dmat = as.matrix(ud)
dmat = dmat[grep("SW", rownames(dmat), invert=T), grep("SW", colnames(dmat), invert=T)]
ids = rownames(dmat)
d = as.dist(dmat)

meta_16S.c = meta_16S.c %>% mutate(tmp=ID) %>% column_to_rownames("tmp")
d_colony = adonis(dmat~Colony, data=meta_16S.c[ids,], permutations=10000)
d_site = adonis(d~Site, data=meta_16S.c[ids,], permutations=10000)
d_time = adonis(d~Month, data=meta_16S.c[ids,], permutations=10000)
d_pvalues = c(colony = d_colony[[1]]$`Pr(>F)`[1],
                site = d_site[[1]]$`Pr(>F)`[1],
                time = d_time[[1]]$`Pr(>F)`[1])
d_pvalues
##     colony       site       time 
## 0.00009999 0.00009999 0.36546345

Boxplot

dmat = as.matrix(ud)
dmat = dmat[grep("SW", rownames(dmat), invert=T), grep("SW", colnames(dmat), invert=T)]
figmat = matrix(nrow=0, ncol=3)

# Colony
rownames(meta_16S.c) = meta_16S.c$ID
colony = meta_16S.c[rownames(dmat), "Colony"]
for(g in unique(colony)){
  idx = which(colony == g)
  tmp1 = data.frame(Category="Colony", Group="Intra-colony", Value=as.numeric(as.dist(dmat[idx,idx])))
  tmp2 = data.frame(Category="Colony", Group="Inter-colony", Value=as.numeric(dmat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

# Sampling time
time = meta_16S.c[rownames(dmat), "Month"]
for(g in unique(time)){
  idx = which(time == g)
  tmp1 = data.frame(Category="Time", Group="Intra-time", Value=as.numeric(as.dist(dmat[idx,idx])))
  tmp2 = data.frame(Category="Time", Group="Inter-time", Value=as.numeric(dmat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

# Sampling site
site = meta_16S.c[rownames(dmat), "Site"]
for(g in unique(site)){
  idx = which(site == g)
  tmp1 = data.frame(Category="Site", Group="Intra-site", Value=as.numeric(as.dist(dmat[idx,idx])))
  tmp2 = data.frame(Category="Site", Group="Inter-site", Value=as.numeric(dmat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

fig = ggplot(figmat) +
  geom_boxplot(aes(x=Group, y=Value), coef=Inf) +
  facet_grid(~Category, scales="free_x") +
  xlab(NULL) + ylab("Weighted UniFrac") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, size=12)) + 
  scale_y_continuous(limits=c(0, 0.5))
fig + ggsave("2_Weighted_UniFrac.Boxplot.Coral.pdf", width=2.5, height=3)

Unweighted UniFrac

# Make phyloseq object
otutab = otu_table(otutab, taxa_are_rows=T)
physeq = phyloseq(otutab)
physeq = merge_phyloseq(physeq, tree)

# Calculate weighted UniFrac
ud = UniFrac(physeq, weighted=F, normalized=T, fast=T)
dist_mat = as.matrix(ud)

# Heatmap
figmat = dist_mat[grep("SW", rownames(dist_mat), invert=T), grep("SW", colnames(dist_mat), invert=T)]
meta_16S.c = meta_16S.c[rownames(figmat), ]
meta_16S.c = meta_16S.c %>% arrange(Colony,Month)
figmat = figmat[meta_16S.c$ID, meta_16S.c$ID]
annotation = data.frame(Colony=meta_16S.c$Colony, row.names = rownames(figmat))
color.colony = c(rev(brewer.pal(3, "Blues")), brewer.pal(3, "Greens")[3], rev(brewer.pal(3, "Reds")))
names(color.colony) = c("Is1", "Is2", "Is3", "Ky1", "Mi1", "Mi2", "Mi3")
annotation_colors = list(Colony = color.colony)
pheatmap(figmat,
         cluster_rows = F,
         cluster_cols = F,
         annotation_row = annotation,
         annotation_col = annotation,
         labels_row = meta_16S.c$Month,
         labels_col = meta_16S.c$Month,
         annotation_colors = annotation_colors,
         breaks = seq(0, 1, length.out=100),
         color = rev(colorRampPalette(rev(brewer.pal(n = 7, name="RdYlBu")))(100)))

PCoA

library(stats)
cmds = cmdscale(dist_mat, eig=T)
coordinates = cmds$points; colnames(coordinates) = c("PCo1", "PCo2")
eigs = cmds$eig
comps = eigs / sum(eigs) * 100

# Figmat
figmat = coordinates %>% as.data.frame() %>% rownames_to_column("ID") %>%
           mutate(Type=ifelse(grepl("SW", ID), "SW", "Coral"),
                  Colony = meta[ID, "Colony"],
                  Month = meta[ID, "Month"])

color.colony = c(rev(brewer.pal(3, "Blues")), brewer.pal(3, "Greens")[3], rev(brewer.pal(3, "Reds")), "blue", "red")
names(color.colony) = c("Is1", "Is2", "Is3", "Ky1", "Mi1", "Mi2", "Mi3", "Is", "Mi")
shape = c(SW=1, Coral=16)

ggplot(figmat) + 
  geom_point(aes(x=PCo1, y=PCo2, color=Colony, shape=Type), size=2.) + 
  scale_color_manual(values=color.colony) +
  scale_shape_manual(values = shape) +
  theme_bw() +
  theme(axis.title.x = element_text(size=14), axis.title.y = element_text(size=14)) +
  xlab(paste0("PCo 1 (", round(comps[1]), "%)")) + 
  ylab(paste0("PCo 2 (", round(comps[2]), "%)")) + 
  ggsave("2_Unweighted_UniFrac_PCoA.pdf", width=5, height=3)

PERMANOVA

Reference: vegan tutorial page 32

dmat = as.matrix(ud)
dmat = dmat[grep("SW", rownames(dmat), invert=T), grep("SW", colnames(dmat), invert=T)]
ids = rownames(dmat)
d = as.dist(dmat)

meta_16S.c = meta_16S.c %>% mutate(tmp=ID) %>% column_to_rownames("tmp")
d_colony = adonis(dmat~Colony, data=meta_16S.c[ids,], permutations=10000)
d_site = adonis(d~Site, data=meta_16S.c[ids,], permutations=10000)
d_time = adonis(d~Month, data=meta_16S.c[ids,], permutations=10000)
d_pvalues = c(colony = d_colony[[1]]$`Pr(>F)`[1],
                site = d_site[[1]]$`Pr(>F)`[1],
                time = d_time[[1]]$`Pr(>F)`[1])
d_pvalues
##     colony       site       time 
## 0.00009999 0.00009999 0.00269973

Boxplot

dmat = as.matrix(ud)
dmat = dmat[grep("SW", rownames(dmat), invert=T), grep("SW", colnames(dmat), invert=T)]
figmat = matrix(nrow=0, ncol=3)

# Colony
rownames(meta_16S.c) = meta_16S.c$ID
colony = meta_16S.c[rownames(dmat), "Colony"]
for(g in unique(colony)){
  idx = which(colony == g)
  tmp1 = data.frame(Category="Colony", Group="Intra-colony", Value=as.numeric(as.dist(dmat[idx,idx])))
  tmp2 = data.frame(Category="Colony", Group="Inter-colony", Value=as.numeric(dmat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

# Sampling time
time = meta_16S.c[rownames(dmat), "Month"]
for(g in unique(time)){
  idx = which(time == g)
  tmp1 = data.frame(Category="Time", Group="Intra-time", Value=as.numeric(as.dist(dmat[idx,idx])))
  tmp2 = data.frame(Category="Time", Group="Inter-time", Value=as.numeric(dmat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

# Sampling site
site = meta_16S.c[rownames(dmat), "Site"]
for(g in unique(site)){
  idx = which(site == g)
  tmp1 = data.frame(Category="Site", Group="Intra-site", Value=as.numeric(as.dist(dmat[idx,idx])))
  tmp2 = data.frame(Category="Site", Group="Inter-site", Value=as.numeric(dmat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

fig = ggplot(figmat) +
  geom_boxplot(aes(x=Group, y=Value), coef=Inf) +
  facet_grid(~Category, scales="free_x") +
  xlab(NULL) + ylab("Weighted UniFrac") +
  theme_bw() + 
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, size=12)) + 
  scale_y_continuous(limits=c(0, 0.8))
fig + ggsave("2_Unweighted_UniFrac.Boxplot.Coral.pdf", width=2.5, height=3)