Bacterial compositionΒΆ

1_BacterialComposition.utf8
library(tidyverse)
## ─ Attaching packages ────────────────────────────────────────────── tidyverse 1.3.0 ─
## βœ“ ggplot2 3.3.2     βœ“ purrr   0.3.4
## βœ“ tibble  3.0.3     βœ“ dplyr   1.0.1
## βœ“ tidyr   1.1.1     βœ“ stringr 1.4.0
## βœ“ readr   1.3.1     βœ“ forcats 0.5.0
## ─ Conflicts ─────────────────────────────────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(pheatmap)
library(ggsci)

OTU table

otutab = read.table("otutab.txt", sep="\t", header=T, row.names=1, comment.char="", check.names=F)
otutab = otutab[order(as.numeric(gsub("OTU", "", rownames(otutab)))),]
otutab.c = otutab[ , grep("SW", colnames(otutab), invert=T)]
otutab.sw = otutab[ , grep("SW", colnames(otutab))]
otutab.sw = otutab.sw[, grep("Out", colnames(otutab.sw), invert=T)]

Metadata

meta_16S.c = data.frame(ID=colnames(otutab.c), stringsAsFactors=F) %>% 
               mutate(Month = str_extract(ID, "^[0-9]+"), 
                      Colony = sapply(ID, function(x) strsplit(x, "_")[[1]][2]),
                      Site = ifelse(grepl("Is", ID), "Is",
                                    ifelse(grepl("Mi", ID), "Mi", "Ky")))
rownames(meta_16S.c) = meta_16S.c$ID
meta_16S.c$Colony[grep("201411_Is", meta_16S.c$ID)] = "Is1"
meta_16S.c$Colony[grep("201502_Is", meta_16S.c$ID)] = "Is1"
meta_16S.c$Colony[grep("201411_Mi", meta_16S.c$ID)] = "Mi1"
meta_16S.c$Colony[grep("201502_Mi", meta_16S.c$ID)] = "Mi1"
head(meta_16S.c)
##                    ID  Month Colony Site
## 201411_Is1 201411_Is1 201411    Is1   Is
## 201411_Is2 201411_Is2 201411    Is1   Is
## 201411_Is3 201411_Is3 201411    Is1   Is
## 201411_Mi1 201411_Mi1 201411    Mi1   Mi
## 201411_Mi2 201411_Mi2 201411    Mi1   Mi
## 201411_Mi3 201411_Mi3 201411    Mi1   Mi
meta_16S.sw = data.frame(ID=colnames(otutab.sw), stringsAsFactors=F) %>% 
                mutate(Month = str_extract(ID, "^[0-9]+"), 
                       Site = ifelse(grepl("Is", ID), "Is", "Mi"))
rownames(meta_16S.sw) = meta_16S.sw$ID
head(meta_16S.sw)
##                        ID  Month Site
## 201411_Is_SW 201411_Is_SW 201411   Is
## 201411_Mi_SW 201411_Mi_SW 201411   Mi
## 201502_Is_SW 201502_Is_SW 201502   Is
## 201502_Mi_SW 201502_Mi_SW 201502   Mi
## 201505_Is_SW 201505_Is_SW 201505   Is
## 201505_Mi_SW 201505_Mi_SW 201505   Mi

Classification

read_RDP = function(file, class=c("domain", "phylum", "class", "order", "family", "genus")){
  lines = scan(file, sep="\n", what="characters")
  result = lapply(lines,
                  function(line){
                    a = strsplit(line, "\t")[[1]]
                    idx = which(a %in% class)
                    tax = rep(NA, length(class)); names(tax) = class
                    score = rep(NA, length(class)); names(score) = class
                    tax[a[idx]] = gsub('"', "", a[idx - 1])
                    score[a[idx]] = a[idx + 1]
                    names(score) = paste0(names(score), ".score")
                    c(otu=a[1], tax, score)
                  })
  result = do.call(rbind, result)
  result = data.frame(result, stringsAsFactors=F)
  for(i in grep("score", colnames(result))) result[,i] = as.numeric(result[,i])
  result = result %>% column_to_rownames("otu")
  result
}
rdp = read_RDP("otus.RDPclassifier.txt")
head(rdp)
##        domain                    phylum               class             order
## OTU1 Bacteria            Proteobacteria Alphaproteobacteria     Rickettsiales
## OTU2 Bacteria Cyanobacteria/Chloroplast       Cyanobacteria              <NA>
## OTU3 Bacteria            Proteobacteria Gammaproteobacteria Oceanospirillales
## OTU4 Bacteria Cyanobacteria/Chloroplast       Cyanobacteria              <NA>
## OTU5 Bacteria            Proteobacteria Gammaproteobacteria Oceanospirillales
## OTU6 Bacteria Cyanobacteria/Chloroplast       Cyanobacteria              <NA>
##               family          genus domain.score phylum.score class.score
## OTU1 Anaplasmataceae      Anaplasma            1         0.93        0.87
## OTU2     Family VIII         GpVIII            1         1.00        1.00
## OTU3     Hahellaceae Endozoicomonas            1         1.00        1.00
## OTU4        Family X            GpX            1         0.94        0.92
## OTU5     Hahellaceae Endozoicomonas            1         1.00        0.99
## OTU6       Family II          GpIIa            1         1.00        1.00
##      order.score family.score genus.score
## OTU1        0.75         0.73        0.71
## OTU2          NA         0.99        0.99
## OTU3        1.00         0.78        0.76
## OTU4          NA         0.35        0.35
## OTU5        0.98         0.68        0.64
## OTU6          NA         1.00        1.00
silva = read.table("otus.silva.txt", sep="\t", stringsAsFactors=F)
silva.tax = lapply(silva[,2], 
                   function(x){
                     a = gsub(";", "", strsplit(x, "tax=")[[1]][2])
                     a = strsplit(a, ",")[[1]]
                     a
                   })
silva.tax = do.call(rbind, silva.tax)
colnames(silva.tax) = c("family", "genus", "species")
silva = data.frame(otu = silva[,1],
                   silva.tax,
                   identity=silva[,3], stringsAsFactors = F)
silva = silva %>% column_to_rownames("otu")
silva = silva[order(as.numeric(gsub("OTU", "", rownames(silva)))),]
head(silva)
##                    family             genus
## OTU1    f:Anaplasmataceae       g:Anaplasma
## OTU2    f:Oscillatoriales   g:Pantanalinema
## OTU3        f:Hahellaceae  g:Endozoicomonas
## OTU4     f:Stigonematales         g:Iphinoe
## OTU5        f:Hahellaceae  g:Endozoicomonas
## OTU6 f:Prochlorococcaceae g:Prochlorococcus
##                                        species identity
## OTU1               s:Anaplasma_phagocytophilum     84.4
## OTU2                  s:Pantanalinema_rosaneae     91.5
## OTU3             s:Endozoicomonas_gorgoniicola     91.7
## OTU4                     s:Iphinoe_spelaeobios     87.9
## OTU5             s:Endozoicomonas_gorgoniicola     88.9
## OTU6 s:Prochlorococcus_marinus_subsp._pastoris     95.2

Genus-level composition

rdp_genus = rdp %>% rownames_to_column("otu") %>% 
              select(otu, genus, genus.score)  %>% 
              mutate(genus_threshold = ifelse(genus.score > 0.5, genus, "Unclassified")) %>%
              column_to_rownames("otu")

genustab.c = otutab.c %>% 
                mutate(genus = rdp_genus[rownames(otutab.c), "genus_threshold"]) %>% 
                group_by(genus) %>% summarise_all(sum) %>% as.data.frame() %>% column_to_rownames("genus")
head(genustab.c)
##                    201411_Is1 201411_Is2 201411_Is3 201411_Mi1 201411_Mi2
## Acidovorax                  0          0          0          3          0
## Acinetobacter               0          0          0          0          0
## Aestuariibacter             0          0          0          0          0
## Afipia                      0         38          0          0          0
## Ahrensia                    0          0          0          0          0
## Altererythrobacter          0          0          0          0          0
##                    201411_Mi3 201502_Is2 201502_Is3 201502_Mi1 201502_Mi2
## Acidovorax                 14         23          0          0          0
## Acinetobacter               8          0          0          0          0
## Aestuariibacter             0          0          0          0          0
## Afipia                      0          0          0          0          0
## Ahrensia                    0          0          0          0          0
## Altererythrobacter          0          0          0          0          0
##                    201502_Mi3 201505_Is1 201505_Is2 201505_Is3 201505_Mi1
## Acidovorax                  0          0          0          0          0
## Acinetobacter               0          0          0          0          0
## Aestuariibacter             0          0          0          0          0
## Afipia                      0          0          0          0          0
## Ahrensia                    0          0          0          0          0
## Altererythrobacter          0          0          0          0          5
##                    201505_Mi2 201505_Mi3 201508_Is1 201508_Is2 201508_Is3
## Acidovorax                  0          0          0          0          0
## Acinetobacter               0          0          0          3          0
## Aestuariibacter             0          0          0          0          0
## Afipia                      0          0          0          0          0
## Ahrensia                    0          0          0          0          0
## Altererythrobacter          0          0          0          0          0
##                    201508_Mi1 201508_Mi2 201508_Mi3 201511_Is1 201511_Is2
## Acidovorax                  0          0          0          0          1
## Acinetobacter               0          1          0          0        120
## Aestuariibacter             0          0          0          0          0
## Afipia                      0          0          0          0          0
## Ahrensia                    0          0          0          0          0
## Altererythrobacter          0          0          1          0          0
##                    201511_Is3 201511_Mi1 201511_Mi2 201511_Mi3
## Acidovorax                  0          0          0          0
## Acinetobacter             526          0          0          0
## Aestuariibacter             0          0          0          0
## Afipia                      0          0          0          0
## Ahrensia                    0          2          0          0
## Altererythrobacter          0          0          0          0
genustab.sw = otutab.sw %>% 
                 mutate(genus = rdp_genus[rownames(otutab.sw), "genus_threshold"]) %>% 
                 group_by(genus) %>% summarise_all(sum) %>% as.data.frame() %>% column_to_rownames("genus")
head(genustab.sw)
##                    201411_Is_SW 201411_Mi_SW 201502_Is_SW 201502_Mi_SW
## Acidovorax                    1          973            0            1
## Acinetobacter                 0            0            0            0
## Aestuariibacter               0            4            0            7
## Afipia                        0            0            0            0
## Ahrensia                      3           17            0            8
## Altererythrobacter           21            3            1           28
##                    201505_Is_SW 201505_Mi_SW 201508_Is_SW 201508_Mi_SW
## Acidovorax                    1            0            0            0
## Acinetobacter                 0            0            0            0
## Aestuariibacter               0            2            0            0
## Afipia                        0            0            0            1
## Ahrensia                      1            2            0            0
## Altererythrobacter            0            9            0            0
##                    201511_Is_SW 201511_Mi_SW
## Acidovorax                    1            0
## Acinetobacter                 0            0
## Aestuariibacter               0            0
## Afipia                        0            0
## Ahrensia                      0            0
## Altererythrobacter            0            0

Visualization

make_bar = function(mat, meta, threshold=0.1, colors=pal_d3("category20"), other="Others",
                    remain=c("Unclassified"), colors.exception=c("Unclassified"="black", "Others"="grey")){
  # Normalize
  mat = t(apply(mat, 1, function(x, s) x / s * 100, colSums(mat)))
  
  # Make others
  max = apply(mat, 1, max)
  others = (max < threshold * 100) & (!(names(max) %in% remain))
  mat = rbind(mat[!others, ], Others=colSums(mat[others,]))
  
  # Color
  tax2color = colors(nrow(mat))
  names(tax2color) = rownames(mat)
  tax = intersect(names(colors.exception), names(tax2color))
  tax2color[tax] = colors.exception[tax]
  
  # 
  figmat = mat %>% as.data.frame() %>% 
             rownames_to_column("tax") %>% 
             gather(key="ID", value="proportion", -tax) %>% 
             left_join(meta, by="ID")
  figmat$tax = factor(figmat$tax, levels=rev(names(tax2color)))
  
  # plot
  ggplot(figmat) + 
    geom_bar(aes(x=ID, y=proportion, fill=tax), position="stack", stat="identity") + 
    facet_grid(~Colony, scales="free_x", space="free_x") +
    theme_bw() +
    theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1), 
          strip.background=element_rect(fill="white")) +
    scale_fill_manual(values=tax2color) + 
    xlab(NULL) + ylab(NULL)
}

201508_Mi3 was composed of OTU15:Propionibacterium_acnes, thus suspected as contaminated

make_bar(otutab.c, meta_16S.c)

# removal of contaminated sample 
make_bar(otutab.c[,colnames(otutab.c) != "201508_Mi3"], meta_16S.c) +
  ggsave("1_BacterialComposition.OTU.pdf", width=5.5, height=3.5)

# Genus level
make_bar(genustab.c[,colnames(genustab.c) != "201508_Mi3"], meta_16S.c, colors=pal_lancet()) +
  ggsave("1_BacterialComposition.Genus.pdf", width=5.5, height=3.5)

comparison = function(mat.c, mat.sw, target, col){
  # Normalize
  mat.c = t(apply(mat.c, 1, function(x, s) x / s * 100, colSums(mat.c)))
  mat.sw = t(apply(mat.sw, 1, function(x, s) x / s * 100, colSums(mat.sw)))
  
  figmat.c = data.frame(Value = mat.c[target, ],
                        Group = meta_16S.c[colnames(mat.c), "Colony"],
                        Category = "Coral",
                        stringsAsFactors = F)
  figmat.sw = data.frame(Value = mat.sw[target, ],
                         Group = meta_16S.sw[colnames(mat.sw), "Site"],
                         Category = "Seawater",
                         stringsAsFactors = F)
  ymax = max(c(figmat.c$Value, figmat.sw$Value))
  if(ymax < 1) ymax = 1
  
  figmat_mean = figmat.c %>% group_by(Group) %>% 
    summarize(mean=mean(Value), median=median(Value), q75=quantile(Value, probs=0.75), q25=quantile(Value, probs=0.25))
  fig1 = ggplot(figmat.c) + 
    geom_errorbar(aes(x=Group, y=median, ymin=median, ymax=median), data=figmat_mean, width=1, color="grey") +
    geom_errorbar(aes(x=Group, y=median, ymin=q25, ymax=q75), data=figmat_mean, width=0.2, color="grey") +
    geom_jitter(aes(x=Group, y=Value), height=0, width=0.2, color=col, alpha=0.7) +
    theme_bw() + xlab(NULL) + ylab("Proportion [%]") + ggtitle("Coral") +
    theme(plot.title = element_text(hjust=0.5, size=10)) + 
    scale_y_continuous(limits=c(0, ymax))
  
  figmat_mean = figmat.sw %>% group_by(Group) %>% 
    summarize(mean=mean(Value), median=median(Value), q75=quantile(Value, probs=0.75), q25=quantile(Value, probs=0.25))
  fig2 = ggplot(figmat.sw) + 
    geom_errorbar(aes(x=Group, y=median, ymin=median, ymax=median), data=figmat_mean, width=1, color="grey") +
    geom_errorbar(aes(x=Group, y=median, ymin=q25, ymax=q75), data=figmat_mean, width=0.2, color="grey") +
    geom_jitter(aes(x=Group, y=Value), height=0, width=0.2, color=col, alpha=0.7) +
    theme_bw() + xlab(NULL) + ylab(NULL) + ggtitle("Seawater") +
    theme(plot.title = element_text(hjust=0.5, size=10)) + 
    scale_y_continuous(limits=c(0, ymax))
  
  cowplot::plot_grid(fig1, fig2, align="h", nrow=1, rel_widths=c(1, 0.4))
}

comparison(genustab.c, genustab.sw, "Anaplasma", pal_lancet()(9)[1]) + ggsave("1_BacterialComposition/Anaplasma.pdf", width=3, height=1.8)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

comparison(genustab.c, genustab.sw, "Endozoicomonas", pal_lancet()(9)[2]) + ggsave("1_BacterialComposition/Endozoicomonas.pdf", width=3, height=1.8)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

comparison(genustab.c, genustab.sw, "GpVIII", pal_lancet()(9)[3]) + ggsave("1_BacterialComposition/GpVIII.pdf", width=3, height=1.8)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

otus = c("OTU1", "OTU2", "OTU3", "OTU4", "OTU5", "OTU13", "OTU14", "OTU16", "OTU40", "OTU549")
for(i in seq_len(length(otus))){
  otu = otus[i]
  comparison(otutab.c, otutab.sw, otu, pal_d3("category20")(20)[i]) + 
    ggsave(paste0("1_BacterialComposition/", otu, ".pdf"), width=3, height=1.8)
}
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

Save

save(otutab.c, otutab.sw, meta_16S.c, meta_16S.sw, genustab.c, genustab.sw, rdp, silva, file="1_BacterialComposition.RData")