Gene set analysis for the WGCNA modulesΒΆ

2.5_GeneSetAnalysis.UT.v2.utf8
library(clusterProfiler)
library(tidyverse)
library(openxlsx)
barGSA = function(GSA_result, n, font=3, color_bar, font_color="grey20", border_color="#00000000", limits=c(0, 5)){
  mat = GSA_result[1:min(c(n, nrow(GSA_result))), ]
  figmat = data.frame(P=-log10(mat$p.adjust),
                      Term=Hmisc::capitalize(mat$Description),
                      stringsAsFactors = F)
  figmat$Term = gsub("MRNA", "mRNA", figmat$Term)
  figmat$Term = gsub("RRNA", "rRNA", figmat$Term)
  figmat$Term = gsub("^Ko:", "ko:", figmat$Term)
  figmat$Term = factor(figmat$Term, levels=rev(figmat$Term))
  y.lab = max(figmat$P) * 0.02
  fig = ggplot(figmat) +
    geom_bar(aes(x=Term, y=P), stat="identity", fill=color_bar, color=border_color) +
    geom_text(aes(label=Term, x=Term), y=max(limits)*0.01, hjust=0, size=font, color=font_color) +
    coord_flip() + theme_minimal() + xlab(NULL) + ylab(NULL) +
    theme(axis.text.x = element_text(size=10), axis.text.y = element_blank(),
          axis.ticks.y = element_blank(), axis.title.x = element_text(size=10),
          plot.title = element_blank()) +
    scale_y_continuous(limits=limits)
  fig
}

merged_barGSA = function(gsa, modules, limits){
  figs = list()
  for(i in seq_len(nrow(modules))){
    m = modules[i, 1]
    if(!is.null(gsa[[m]])){
      figs[[m]] = barGSA(gsa[[m]], n=as.numeric(modules[i,4]), color_bar=m, font_color=modules[i, 2], 
                         border_color=modules[i,3], limits=limits, font=3) +
          theme(legend.position="none", plot.margin = unit(c(0, 0, 0, 0), "cm"), axis.text.x = element_blank())
    }
  }
  figs[[length(figs)]] = figs[[length(figs)]] + theme(axis.text.x = element_text(size=10))
  cowplot::plot_grid(plotlist=figs, ncol=1, align="h", rel_heights=as.numeric(modules[, 4]))
}

Load files

t = 0.2
load("2.1.c_WGCNA_ThresholdingPower.UT.RData")
load("2.2.c_WGCNA.Modules.UT.RData")
load("2.1.z_WGCNA_ThresholdingPower.UT.RData")
load("2.2.z_WGCNA.Modules.UT.RData")
load("../Annotation/zooxanthellae/rnaSPAdes.1Mreads.UT/Annotation.Zooxanthellae.UT.RData")
load("../Annotation/coral/Annotation.Coral.UT.RData")

Coral

modules = unique(modules.c[[as.character(t)]]$colors)
module2genes.c = list()
for(m in modules) module2genes.c[[m]] = colnames(mat.c)[modules.c[[as.character(t)]]$colors == m]

tmp = data.frame(Module = rep(modules, sapply(module2genes.c,length)),
                 Gene = unlist(module2genes.c),
                 stringsAsFactors = F)
write.xlsx(tmp, file="2.5_Module.GeneList.Coral.UT.xlsx")

TERM2GENE = gene2GO.c[,c("GO", "Gene")]
genes_not_found = tmp %>% filter(!Gene %in% TERM2GENE$Gene) %>% pull(Gene)
TERM2GENE = rbind(TERM2GENE, cbind(GO="No hit", Gene=genes_not_found))
TERM2GENE = subset(TERM2GENE, Gene %in% tmp$Gene)
gsa.GO.c = list()
for(m in modules){
  print(m)
  gsa = enricher(module2genes.c[[m]], 
                 minGSSize=5, maxGSSize=500, 
                 universe=colnames(mat.c), qvalueCutoff=0.1, 
                 TERM2GENE=TERM2GENE)
  gsa = as.data.frame(gsa)
  terms = go2term(gsa$ID) %>% column_to_rownames("go_id")
  gsa$Description = terms[gsa$ID, 1]
  if(nrow(gsa) > 0) gsa = data.frame(Module=m, gsa, row.names=NULL)
  else gsa = NULL
  gsa.GO.c[[m]] = gsa
}
gsa.GO.mat.c = do.call(rbind, gsa.GO.c)
rownames(gsa.GO.mat.c) = NULL
write.xlsx(gsa.GO.mat.c, file="2.5_Module.GSA.GO.Coral.UT.xlsx")
save(gsa.GO.c, file="2.5_Module.GSA.GO.Coral.UT.RData")
load("2.5_Module.GSA.GO.Coral.UT.RData")
load("2.5_Module.GSA.KEGG.Coral.UT.RData")
# Modules correlated with temperature
modules = rbind(c("skyblue", "grey20", "#00000000", 5),
                #c("black", "white", "#00000000", 0),
                c("green", "grey20", "#00000000", 2.5),
                c("floralwhite", "grey20", "grey", 5),
                c("brown4", "grey70", "#00000000", 2.5),
                c("cyan", "grey20", "#00000000", 5))
limits = c(0, 7)
merged_barGSA(gsa.GO.c, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Temperature.pdf", width=3.8, height=3.8)

modules = rbind(c("skyblue", "grey20", "#00000000", 5),
                c("black", "grey70", "#00000000", 5),
                c("green", "grey20", "#00000000", 3.5),
                c("floralwhite", "grey20", "grey", 3.5),
                c("brown4", "grey70", "#00000000", 5),
                c("cyan", "grey20", "#00000000", 5))
limits = c(0, 8.5)
merged_barGSA(gsa.KEGG.c, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Temperature.KEGG.pdf", width=3.8, height=5)

# Modules anti-correlated with temperature
modules = rbind(c("white", "grey20", "grey", 5),
                c("lightyellow", "grey20", "grey", 5),
                c("lightcyan1", "grey20", "#00000000", 5),
                c("turquoise", "grey20", "#00000000", 5),
                c("orangered4", "grey70", "#00000000", 5),
                c("blue", "grey70", "#00000000", 5),
                c("sienna3", "grey20", "#00000000", 5))
limits = c(0, 8.5)
merged_barGSA(gsa.GO.c, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Temperature_nega.pdf", width=3.8, height=6)

modules = rbind(c("white", "grey20", "grey", 5),
                c("lightyellow", "grey20", "grey", 5),
                c("lightcyan1", "grey20", "#00000000", 2),
                c("turquoise", "grey20", "#00000000", 2.5),
                c("orangered4", "grey70", "#00000000", 4),
                c("blue", "grey70", "#00000000", 5),
                c("sienna3", "grey20", "#00000000", 5))
limits = c(0, 12)
merged_barGSA(gsa.KEGG.c, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Temperature_nega.KEGG.pdf", width=3.8, height=6)

# Colony-dependent modules
modules = rbind(c("royalblue", "black", "#00000000", 5),
                c("purple", "black", "#00000000", 3.5),
                c("ivory", "black", "grey", 5),
                #c("darkgrey", "white", "#00000000", 0),
                #c("darkorange", "white", "#00000000", 0),
                c("saddlebrown", "grey70", "#00000000", 2.5),
                c("darkred", "grey70", "#00000000", 2.5),
                c("yellowgreen", "black", "#00000000", 5),
                c("lightcyan", "black", "#00000000", 2.5),
                c("grey60", "black", "#00000000", 5),
                c("steelblue", "black", "#00000000", 2.5),
                #c("lightgreen", "white", "#00000000", 0),
                c("mediumpurple3", "black", "#00000000", 3.5),
                c("darkturquoise", "black", "#00000000", 5),
                c("midnightblue", "grey70", "#00000000", 3.5))
limits = c(0, 8.5)
merged_barGSA(gsa.GO.c, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Colony-dependent.pdf", width=3.8, height=8)

# Colony-dependent & DNA modification 
modules = rbind(c("royalblue", "black", "#00000000", 5),
                c("saddlebrown", "grey70", "#00000000", 2.5),
                c("darkred", "grey70", "#00000000", 2.5),
                c("lightcyan", "black", "#00000000", 2.5),
                c("steelblue", "black", "#00000000", 2.5),
                c("darkturquoise", "black", "#00000000", 5))
limits = c(0, 8.5)
merged_barGSA(gsa.GO.c, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Colony-dependent.DNA_modification.pdf", width=3.8, height=3.5)

# Other modules
modules = rbind(c("purple", "black", "#00000000", 3.5),
                c("ivory", "black", "grey", 5),
                c("yellowgreen", "black", "#00000000", 5),
                c("grey60", "black", "#00000000", 5),
                c("mediumpurple3", "black", "#00000000", 3.5),
                c("midnightblue", "grey70", "#00000000", 3.5))
limits = c(0, 8)
merged_barGSA(gsa.GO.c, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Colony-dependent.Other.pdf", width=3.8, height=4.2)

Zooxanthellae

modules = unique(modules.z[[as.character(t)]]$colors)
module2genes.z = list()
for(m in modules) module2genes.z[[m]] = colnames(mat.z)[modules.z[[as.character(t)]]$colors == m]

tmp = data.frame(Module = rep(modules, sapply(module2genes.z,length)),
                 Gene = unlist(module2genes.z),
                 stringsAsFactors = F)
write.xlsx(tmp, file="2.5_Module.GeneList.Zooxanthellae.UT.xlsx")

TERM2GENE = gene2GO.z[,c("GO", "Gene")]
genes_not_found = tmp %>% filter(!Gene %in% TERM2GENE$Gene) %>% pull(Gene)
TERM2GENE = rbind(TERM2GENE, cbind(GO="No hit", Gene=genes_not_found))
TERM2GENE = subset(TERM2GENE, Gene %in% tmp$Gene)
gsa.GO.z = list()
for(m in modules){
  gsa = enricher(module2genes.z[[m]], 
                 minGSSize=5, maxGSSize=500, 
                 universe=colnames(mat.z), qvalueCutoff=0.1, 
                 TERM2GENE=TERM2GENE)
  gsa = as.data.frame(gsa)
  terms = go2term(gsa$ID) %>% column_to_rownames("go_id")
  gsa$Description = terms[gsa$ID, 1]
  if(nrow(gsa) > 0) gsa = data.frame(Module=m, gsa, row.names=NULL)
  else gsa = NULL
  gsa.GO.z[[m]] = gsa
}
gsa.GO.mat.z = do.call(rbind, gsa.GO.z)
rownames(gsa.GO.mat.z) = NULL
write.xlsx(gsa.GO.mat.z, file="2.5_Module.GSA.GO.Zooxanthellae.UT.xlsx")
save(gsa.GO.z, file="2.5_Module.GSA.GO.Zooxanthellae.UT.RData")
load("2.5_Module.GSA.GO.Zooxanthellae.UT.RData")
load("2.5_Module.GSA.KEGG.Zooxanthellae.UT.RData")
# Modules correlated with temperature
modules = rbind(c("steelblue", "grey20", "#00000000", 2),
                c("plum2", "grey20", "#00000000", 5),
                c("thistle2", "grey20", "#00000000", 5),
                c("skyblue", "grey20", "#00000000", 5),
                c("skyblue3", "grey20", "#00000000", 5),
                c("purple", "grey70", "#00000000", 5),
                c("floralwhite", "grey20", "grey", 5))
limits = c(0, 12)
merged_barGSA(gsa.GO.z, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Zooxanthellae.Temperature.pdf", width=3.8, height=5)

modules = rbind(c("steelblue", "grey20", "#00000000", 2),
                c("plum2", "grey20", "#00000000", 3),
                c("thistle2", "grey20", "#00000000", 5),
                c("skyblue", "grey20", "#00000000", 5),
                c("skyblue3", "grey20", "#00000000", 3.5),
                c("purple", "grey70", "#00000000", 2))
                #c("floralwhite", "grey20", "grey", 0))
limits = c(0, 10)
merged_barGSA(gsa.KEGG.z, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Zooxanthellae.Temperature.KEGG.pdf", width=3.8, height=3.5)

# Modules correlated with temperature
modules = rbind(#c("mediumpurple3", "grey20", "#00000000", 2),
                #c("darkorange", "grey20", "#00000000", 5),
                c("paleturquoise", "grey20", "#00000000", 3.5),
                c("royalblue", "black", "#00000000", 5))
                #c("orangered", "grey20", "#00000000", 5))
limits = c(0, 5)
merged_barGSA(gsa.GO.z, modules, limits) + 
  ggsave("2.5_GeneSetAnalysis.UT.v2/Zooxanthellae.Colony-dependent.pdf", width=3.8, height=1.5)