Bacterial compositionΒΆ
library(tidyverse)
## β Attaching packages ββββββββββββββββββββββββββββββββββββββββββββββ tidyverse 1.3.0 β
## β ggplot2 3.3.2 β purrr 0.3.4
## β tibble 3.0.3 β dplyr 1.0.1
## β tidyr 1.1.1 β stringr 1.4.0
## β readr 1.3.1 β forcats 0.5.0
## β Conflicts βββββββββββββββββββββββββββββββββββββββββββββββ tidyverse_conflicts() β
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(pheatmap)
library(ggsci)
OTU table
otutab = read.table("otutab.txt", sep="\t", header=T, row.names=1, comment.char="", check.names=F)
otutab = otutab[order(as.numeric(gsub("OTU", "", rownames(otutab)))),]
otutab.c = otutab[ , grep("SW", colnames(otutab), invert=T)]
otutab.sw = otutab[ , grep("SW", colnames(otutab))]
otutab.sw = otutab.sw[, grep("Out", colnames(otutab.sw), invert=T)]
Metadata
meta_16S.c = data.frame(ID=colnames(otutab.c), stringsAsFactors=F) %>%
mutate(Month = str_extract(ID, "^[0-9]+"),
Colony = sapply(ID, function(x) strsplit(x, "_")[[1]][2]),
Site = ifelse(grepl("Is", ID), "Is",
ifelse(grepl("Mi", ID), "Mi", "Ky")))
rownames(meta_16S.c) = meta_16S.c$ID
meta_16S.c$Colony[grep("201411_Is", meta_16S.c$ID)] = "Is1"
meta_16S.c$Colony[grep("201502_Is", meta_16S.c$ID)] = "Is1"
meta_16S.c$Colony[grep("201411_Mi", meta_16S.c$ID)] = "Mi1"
meta_16S.c$Colony[grep("201502_Mi", meta_16S.c$ID)] = "Mi1"
head(meta_16S.c)
## ID Month Colony Site
## 201411_Is1 201411_Is1 201411 Is1 Is
## 201411_Is2 201411_Is2 201411 Is1 Is
## 201411_Is3 201411_Is3 201411 Is1 Is
## 201411_Mi1 201411_Mi1 201411 Mi1 Mi
## 201411_Mi2 201411_Mi2 201411 Mi1 Mi
## 201411_Mi3 201411_Mi3 201411 Mi1 Mi
meta_16S.sw = data.frame(ID=colnames(otutab.sw), stringsAsFactors=F) %>%
mutate(Month = str_extract(ID, "^[0-9]+"),
Site = ifelse(grepl("Is", ID), "Is", "Mi"))
rownames(meta_16S.sw) = meta_16S.sw$ID
head(meta_16S.sw)
## ID Month Site
## 201411_Is_SW 201411_Is_SW 201411 Is
## 201411_Mi_SW 201411_Mi_SW 201411 Mi
## 201502_Is_SW 201502_Is_SW 201502 Is
## 201502_Mi_SW 201502_Mi_SW 201502 Mi
## 201505_Is_SW 201505_Is_SW 201505 Is
## 201505_Mi_SW 201505_Mi_SW 201505 Mi
Classification
read_RDP = function(file, class=c("domain", "phylum", "class", "order", "family", "genus")){
lines = scan(file, sep="\n", what="characters")
result = lapply(lines,
function(line){
a = strsplit(line, "\t")[[1]]
idx = which(a %in% class)
tax = rep(NA, length(class)); names(tax) = class
score = rep(NA, length(class)); names(score) = class
tax[a[idx]] = gsub('"', "", a[idx - 1])
score[a[idx]] = a[idx + 1]
names(score) = paste0(names(score), ".score")
c(otu=a[1], tax, score)
})
result = do.call(rbind, result)
result = data.frame(result, stringsAsFactors=F)
for(i in grep("score", colnames(result))) result[,i] = as.numeric(result[,i])
result = result %>% column_to_rownames("otu")
result
}
rdp = read_RDP("otus.RDPclassifier.txt")
head(rdp)
## domain phylum class order
## OTU1 Bacteria Proteobacteria Alphaproteobacteria Rickettsiales
## OTU2 Bacteria Cyanobacteria/Chloroplast Cyanobacteria <NA>
## OTU3 Bacteria Proteobacteria Gammaproteobacteria Oceanospirillales
## OTU4 Bacteria Cyanobacteria/Chloroplast Cyanobacteria <NA>
## OTU5 Bacteria Proteobacteria Gammaproteobacteria Oceanospirillales
## OTU6 Bacteria Cyanobacteria/Chloroplast Cyanobacteria <NA>
## family genus domain.score phylum.score class.score
## OTU1 Anaplasmataceae Anaplasma 1 0.93 0.87
## OTU2 Family VIII GpVIII 1 1.00 1.00
## OTU3 Hahellaceae Endozoicomonas 1 1.00 1.00
## OTU4 Family X GpX 1 0.94 0.92
## OTU5 Hahellaceae Endozoicomonas 1 1.00 0.99
## OTU6 Family II GpIIa 1 1.00 1.00
## order.score family.score genus.score
## OTU1 0.75 0.73 0.71
## OTU2 NA 0.99 0.99
## OTU3 1.00 0.78 0.76
## OTU4 NA 0.35 0.35
## OTU5 0.98 0.68 0.64
## OTU6 NA 1.00 1.00
silva = read.table("otus.silva.txt", sep="\t", stringsAsFactors=F)
silva.tax = lapply(silva[,2],
function(x){
a = gsub(";", "", strsplit(x, "tax=")[[1]][2])
a = strsplit(a, ",")[[1]]
a
})
silva.tax = do.call(rbind, silva.tax)
colnames(silva.tax) = c("family", "genus", "species")
silva = data.frame(otu = silva[,1],
silva.tax,
identity=silva[,3], stringsAsFactors = F)
silva = silva %>% column_to_rownames("otu")
silva = silva[order(as.numeric(gsub("OTU", "", rownames(silva)))),]
head(silva)
## family genus
## OTU1 f:Anaplasmataceae g:Anaplasma
## OTU2 f:Oscillatoriales g:Pantanalinema
## OTU3 f:Hahellaceae g:Endozoicomonas
## OTU4 f:Stigonematales g:Iphinoe
## OTU5 f:Hahellaceae g:Endozoicomonas
## OTU6 f:Prochlorococcaceae g:Prochlorococcus
## species identity
## OTU1 s:Anaplasma_phagocytophilum 84.4
## OTU2 s:Pantanalinema_rosaneae 91.5
## OTU3 s:Endozoicomonas_gorgoniicola 91.7
## OTU4 s:Iphinoe_spelaeobios 87.9
## OTU5 s:Endozoicomonas_gorgoniicola 88.9
## OTU6 s:Prochlorococcus_marinus_subsp._pastoris 95.2
Genus-level composition
rdp_genus = rdp %>% rownames_to_column("otu") %>%
select(otu, genus, genus.score) %>%
mutate(genus_threshold = ifelse(genus.score > 0.5, genus, "Unclassified")) %>%
column_to_rownames("otu")
genustab.c = otutab.c %>%
mutate(genus = rdp_genus[rownames(otutab.c), "genus_threshold"]) %>%
group_by(genus) %>% summarise_all(sum) %>% as.data.frame() %>% column_to_rownames("genus")
head(genustab.c)
## 201411_Is1 201411_Is2 201411_Is3 201411_Mi1 201411_Mi2
## Acidovorax 0 0 0 3 0
## Acinetobacter 0 0 0 0 0
## Aestuariibacter 0 0 0 0 0
## Afipia 0 38 0 0 0
## Ahrensia 0 0 0 0 0
## Altererythrobacter 0 0 0 0 0
## 201411_Mi3 201502_Is2 201502_Is3 201502_Mi1 201502_Mi2
## Acidovorax 14 23 0 0 0
## Acinetobacter 8 0 0 0 0
## Aestuariibacter 0 0 0 0 0
## Afipia 0 0 0 0 0
## Ahrensia 0 0 0 0 0
## Altererythrobacter 0 0 0 0 0
## 201502_Mi3 201505_Is1 201505_Is2 201505_Is3 201505_Mi1
## Acidovorax 0 0 0 0 0
## Acinetobacter 0 0 0 0 0
## Aestuariibacter 0 0 0 0 0
## Afipia 0 0 0 0 0
## Ahrensia 0 0 0 0 0
## Altererythrobacter 0 0 0 0 5
## 201505_Mi2 201505_Mi3 201508_Is1 201508_Is2 201508_Is3
## Acidovorax 0 0 0 0 0
## Acinetobacter 0 0 0 3 0
## Aestuariibacter 0 0 0 0 0
## Afipia 0 0 0 0 0
## Ahrensia 0 0 0 0 0
## Altererythrobacter 0 0 0 0 0
## 201508_Mi1 201508_Mi2 201508_Mi3 201511_Is1 201511_Is2
## Acidovorax 0 0 0 0 1
## Acinetobacter 0 1 0 0 120
## Aestuariibacter 0 0 0 0 0
## Afipia 0 0 0 0 0
## Ahrensia 0 0 0 0 0
## Altererythrobacter 0 0 1 0 0
## 201511_Is3 201511_Mi1 201511_Mi2 201511_Mi3
## Acidovorax 0 0 0 0
## Acinetobacter 526 0 0 0
## Aestuariibacter 0 0 0 0
## Afipia 0 0 0 0
## Ahrensia 0 2 0 0
## Altererythrobacter 0 0 0 0
genustab.sw = otutab.sw %>%
mutate(genus = rdp_genus[rownames(otutab.sw), "genus_threshold"]) %>%
group_by(genus) %>% summarise_all(sum) %>% as.data.frame() %>% column_to_rownames("genus")
head(genustab.sw)
## 201411_Is_SW 201411_Mi_SW 201502_Is_SW 201502_Mi_SW
## Acidovorax 1 973 0 1
## Acinetobacter 0 0 0 0
## Aestuariibacter 0 4 0 7
## Afipia 0 0 0 0
## Ahrensia 3 17 0 8
## Altererythrobacter 21 3 1 28
## 201505_Is_SW 201505_Mi_SW 201508_Is_SW 201508_Mi_SW
## Acidovorax 1 0 0 0
## Acinetobacter 0 0 0 0
## Aestuariibacter 0 2 0 0
## Afipia 0 0 0 1
## Ahrensia 1 2 0 0
## Altererythrobacter 0 9 0 0
## 201511_Is_SW 201511_Mi_SW
## Acidovorax 1 0
## Acinetobacter 0 0
## Aestuariibacter 0 0
## Afipia 0 0
## Ahrensia 0 0
## Altererythrobacter 0 0
Visualization
make_bar = function(mat, meta, threshold=0.1, colors=pal_d3("category20"), other="Others",
remain=c("Unclassified"), colors.exception=c("Unclassified"="black", "Others"="grey")){
# Normalize
mat = t(apply(mat, 1, function(x, s) x / s * 100, colSums(mat)))
# Make others
max = apply(mat, 1, max)
others = (max < threshold * 100) & (!(names(max) %in% remain))
mat = rbind(mat[!others, ], Others=colSums(mat[others,]))
# Color
tax2color = colors(nrow(mat))
names(tax2color) = rownames(mat)
tax = intersect(names(colors.exception), names(tax2color))
tax2color[tax] = colors.exception[tax]
#
figmat = mat %>% as.data.frame() %>%
rownames_to_column("tax") %>%
gather(key="ID", value="proportion", -tax) %>%
left_join(meta, by="ID")
figmat$tax = factor(figmat$tax, levels=rev(names(tax2color)))
# plot
ggplot(figmat) +
geom_bar(aes(x=ID, y=proportion, fill=tax), position="stack", stat="identity") +
facet_grid(~Colony, scales="free_x", space="free_x") +
theme_bw() +
theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1),
strip.background=element_rect(fill="white")) +
scale_fill_manual(values=tax2color) +
xlab(NULL) + ylab(NULL)
}
201508_Mi3 was composed of OTU15:Propionibacterium_acnes, thus suspected as contaminated
make_bar(otutab.c, meta_16S.c)
# removal of contaminated sample
make_bar(otutab.c[,colnames(otutab.c) != "201508_Mi3"], meta_16S.c) +
ggsave("1_BacterialComposition.OTU.pdf", width=5.5, height=3.5)
# Genus level
make_bar(genustab.c[,colnames(genustab.c) != "201508_Mi3"], meta_16S.c, colors=pal_lancet()) +
ggsave("1_BacterialComposition.Genus.pdf", width=5.5, height=3.5)
comparison = function(mat.c, mat.sw, target, col){
# Normalize
mat.c = t(apply(mat.c, 1, function(x, s) x / s * 100, colSums(mat.c)))
mat.sw = t(apply(mat.sw, 1, function(x, s) x / s * 100, colSums(mat.sw)))
figmat.c = data.frame(Value = mat.c[target, ],
Group = meta_16S.c[colnames(mat.c), "Colony"],
Category = "Coral",
stringsAsFactors = F)
figmat.sw = data.frame(Value = mat.sw[target, ],
Group = meta_16S.sw[colnames(mat.sw), "Site"],
Category = "Seawater",
stringsAsFactors = F)
ymax = max(c(figmat.c$Value, figmat.sw$Value))
if(ymax < 1) ymax = 1
figmat_mean = figmat.c %>% group_by(Group) %>%
summarize(mean=mean(Value), median=median(Value), q75=quantile(Value, probs=0.75), q25=quantile(Value, probs=0.25))
fig1 = ggplot(figmat.c) +
geom_errorbar(aes(x=Group, y=median, ymin=median, ymax=median), data=figmat_mean, width=1, color="grey") +
geom_errorbar(aes(x=Group, y=median, ymin=q25, ymax=q75), data=figmat_mean, width=0.2, color="grey") +
geom_jitter(aes(x=Group, y=Value), height=0, width=0.2, color=col, alpha=0.7) +
theme_bw() + xlab(NULL) + ylab("Proportion [%]") + ggtitle("Coral") +
theme(plot.title = element_text(hjust=0.5, size=10)) +
scale_y_continuous(limits=c(0, ymax))
figmat_mean = figmat.sw %>% group_by(Group) %>%
summarize(mean=mean(Value), median=median(Value), q75=quantile(Value, probs=0.75), q25=quantile(Value, probs=0.25))
fig2 = ggplot(figmat.sw) +
geom_errorbar(aes(x=Group, y=median, ymin=median, ymax=median), data=figmat_mean, width=1, color="grey") +
geom_errorbar(aes(x=Group, y=median, ymin=q25, ymax=q75), data=figmat_mean, width=0.2, color="grey") +
geom_jitter(aes(x=Group, y=Value), height=0, width=0.2, color=col, alpha=0.7) +
theme_bw() + xlab(NULL) + ylab(NULL) + ggtitle("Seawater") +
theme(plot.title = element_text(hjust=0.5, size=10)) +
scale_y_continuous(limits=c(0, ymax))
cowplot::plot_grid(fig1, fig2, align="h", nrow=1, rel_widths=c(1, 0.4))
}
comparison(genustab.c, genustab.sw, "Anaplasma", pal_lancet()(9)[1]) + ggsave("1_BacterialComposition/Anaplasma.pdf", width=3, height=1.8)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
comparison(genustab.c, genustab.sw, "Endozoicomonas", pal_lancet()(9)[2]) + ggsave("1_BacterialComposition/Endozoicomonas.pdf", width=3, height=1.8)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
comparison(genustab.c, genustab.sw, "GpVIII", pal_lancet()(9)[3]) + ggsave("1_BacterialComposition/GpVIII.pdf", width=3, height=1.8)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
otus = c("OTU1", "OTU2", "OTU3", "OTU4", "OTU5", "OTU13", "OTU14", "OTU16", "OTU40", "OTU549")
for(i in seq_len(length(otus))){
otu = otus[i]
comparison(otutab.c, otutab.sw, otu, pal_d3("category20")(20)[i]) +
ggsave(paste0("1_BacterialComposition/", otu, ".pdf"), width=3, height=1.8)
}
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
Save
save(otutab.c, otutab.sw, meta_16S.c, meta_16S.sw, genustab.c, genustab.sw, rdp, silva, file="1_BacterialComposition.RData")