library(clusterProfiler)
library(tidyverse)
library(openxlsx)
barGSA = function(GSA_result, n, font=3, color_bar, font_color="grey20", border_color="#00000000", limits=c(0, 5)){
mat = GSA_result[1:min(c(n, nrow(GSA_result))), ]
figmat = data.frame(P=-log10(mat$p.adjust),
Term=Hmisc::capitalize(mat$Description),
stringsAsFactors = F)
figmat$Term = gsub("MRNA", "mRNA", figmat$Term)
figmat$Term = gsub("RRNA", "rRNA", figmat$Term)
figmat$Term = gsub("^Ko:", "ko:", figmat$Term)
figmat$Term = factor(figmat$Term, levels=rev(figmat$Term))
y.lab = max(figmat$P) * 0.02
fig = ggplot(figmat) +
geom_bar(aes(x=Term, y=P), stat="identity", fill=color_bar, color=border_color) +
geom_text(aes(label=Term, x=Term), y=max(limits)*0.01, hjust=0, size=font, color=font_color) +
coord_flip() + theme_minimal() + xlab(NULL) + ylab(NULL) +
theme(axis.text.x = element_text(size=10), axis.text.y = element_blank(),
axis.ticks.y = element_blank(), axis.title.x = element_text(size=10),
plot.title = element_blank()) +
scale_y_continuous(limits=limits)
fig
}
merged_barGSA = function(gsa, modules, limits){
figs = list()
for(i in seq_len(nrow(modules))){
m = modules[i, 1]
if(!is.null(gsa[[m]])){
figs[[m]] = barGSA(gsa[[m]], n=as.numeric(modules[i,4]), color_bar=m, font_color=modules[i, 2],
border_color=modules[i,3], limits=limits, font=3) +
theme(legend.position="none", plot.margin = unit(c(0, 0, 0, 0), "cm"), axis.text.x = element_blank())
}
}
figs[[length(figs)]] = figs[[length(figs)]] + theme(axis.text.x = element_text(size=10))
cowplot::plot_grid(plotlist=figs, ncol=1, align="h", rel_heights=as.numeric(modules[, 4]))
}
Load files
t = 0.2
load("2.1.c_WGCNA_ThresholdingPower.UT.RData")
load("2.2.c_WGCNA.Modules.UT.RData")
load("2.1.z_WGCNA_ThresholdingPower.UT.RData")
load("2.2.z_WGCNA.Modules.UT.RData")
load("../Annotation/zooxanthellae/rnaSPAdes.1Mreads.UT/Annotation.Zooxanthellae.UT.RData")
load("../Annotation/coral/Annotation.Coral.UT.RData")
Coral
modules = unique(modules.c[[as.character(t)]]$colors)
module2genes.c = list()
for(m in modules) module2genes.c[[m]] = colnames(mat.c)[modules.c[[as.character(t)]]$colors == m]
tmp = data.frame(Module = rep(modules, sapply(module2genes.c,length)),
Gene = unlist(module2genes.c),
stringsAsFactors = F)
write.xlsx(tmp, file="2.5_Module.GeneList.Coral.UT.xlsx")
TERM2GENE = gene2GO.c[,c("GO", "Gene")]
genes_not_found = tmp %>% filter(!Gene %in% TERM2GENE$Gene) %>% pull(Gene)
TERM2GENE = rbind(TERM2GENE, cbind(GO="No hit", Gene=genes_not_found))
TERM2GENE = subset(TERM2GENE, Gene %in% tmp$Gene)
gsa.GO.c = list()
for(m in modules){
print(m)
gsa = enricher(module2genes.c[[m]],
minGSSize=5, maxGSSize=500,
universe=colnames(mat.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE)
gsa = as.data.frame(gsa)
terms = go2term(gsa$ID) %>% column_to_rownames("go_id")
gsa$Description = terms[gsa$ID, 1]
if(nrow(gsa) > 0) gsa = data.frame(Module=m, gsa, row.names=NULL)
else gsa = NULL
gsa.GO.c[[m]] = gsa
}
gsa.GO.mat.c = do.call(rbind, gsa.GO.c)
rownames(gsa.GO.mat.c) = NULL
write.xlsx(gsa.GO.mat.c, file="2.5_Module.GSA.GO.Coral.UT.xlsx")
save(gsa.GO.c, file="2.5_Module.GSA.GO.Coral.UT.RData")
load("2.5_Module.GSA.GO.Coral.UT.RData")
load("2.5_Module.GSA.KEGG.Coral.UT.RData")
# Modules correlated with temperature
modules = rbind(c("skyblue", "grey20", "#00000000", 5),
#c("black", "white", "#00000000", 0),
c("green", "grey20", "#00000000", 2.5),
c("floralwhite", "grey20", "grey", 5),
c("brown4", "grey70", "#00000000", 2.5),
c("cyan", "grey20", "#00000000", 5))
limits = c(0, 7)
merged_barGSA(gsa.GO.c, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Temperature.pdf", width=3.8, height=3.8)
modules = rbind(c("skyblue", "grey20", "#00000000", 5),
c("black", "grey70", "#00000000", 5),
c("green", "grey20", "#00000000", 3.5),
c("floralwhite", "grey20", "grey", 3.5),
c("brown4", "grey70", "#00000000", 5),
c("cyan", "grey20", "#00000000", 5))
limits = c(0, 8.5)
merged_barGSA(gsa.KEGG.c, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Temperature.KEGG.pdf", width=3.8, height=5)
# Modules anti-correlated with temperature
modules = rbind(c("white", "grey20", "grey", 5),
c("lightyellow", "grey20", "grey", 5),
c("lightcyan1", "grey20", "#00000000", 5),
c("turquoise", "grey20", "#00000000", 5),
c("orangered4", "grey70", "#00000000", 5),
c("blue", "grey70", "#00000000", 5),
c("sienna3", "grey20", "#00000000", 5))
limits = c(0, 8.5)
merged_barGSA(gsa.GO.c, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Temperature_nega.pdf", width=3.8, height=6)
modules = rbind(c("white", "grey20", "grey", 5),
c("lightyellow", "grey20", "grey", 5),
c("lightcyan1", "grey20", "#00000000", 2),
c("turquoise", "grey20", "#00000000", 2.5),
c("orangered4", "grey70", "#00000000", 4),
c("blue", "grey70", "#00000000", 5),
c("sienna3", "grey20", "#00000000", 5))
limits = c(0, 12)
merged_barGSA(gsa.KEGG.c, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Temperature_nega.KEGG.pdf", width=3.8, height=6)
# Colony-dependent modules
modules = rbind(c("royalblue", "black", "#00000000", 5),
c("purple", "black", "#00000000", 3.5),
c("ivory", "black", "grey", 5),
#c("darkgrey", "white", "#00000000", 0),
#c("darkorange", "white", "#00000000", 0),
c("saddlebrown", "grey70", "#00000000", 2.5),
c("darkred", "grey70", "#00000000", 2.5),
c("yellowgreen", "black", "#00000000", 5),
c("lightcyan", "black", "#00000000", 2.5),
c("grey60", "black", "#00000000", 5),
c("steelblue", "black", "#00000000", 2.5),
#c("lightgreen", "white", "#00000000", 0),
c("mediumpurple3", "black", "#00000000", 3.5),
c("darkturquoise", "black", "#00000000", 5),
c("midnightblue", "grey70", "#00000000", 3.5))
limits = c(0, 8.5)
merged_barGSA(gsa.GO.c, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Colony-dependent.pdf", width=3.8, height=8)
# Colony-dependent & DNA modification
modules = rbind(c("royalblue", "black", "#00000000", 5),
c("saddlebrown", "grey70", "#00000000", 2.5),
c("darkred", "grey70", "#00000000", 2.5),
c("lightcyan", "black", "#00000000", 2.5),
c("steelblue", "black", "#00000000", 2.5),
c("darkturquoise", "black", "#00000000", 5))
limits = c(0, 8.5)
merged_barGSA(gsa.GO.c, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Colony-dependent.DNA_modification.pdf", width=3.8, height=3.5)
# Other modules
modules = rbind(c("purple", "black", "#00000000", 3.5),
c("ivory", "black", "grey", 5),
c("yellowgreen", "black", "#00000000", 5),
c("grey60", "black", "#00000000", 5),
c("mediumpurple3", "black", "#00000000", 3.5),
c("midnightblue", "grey70", "#00000000", 3.5))
limits = c(0, 8)
merged_barGSA(gsa.GO.c, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Coral.Colony-dependent.Other.pdf", width=3.8, height=4.2)
Zooxanthellae
modules = unique(modules.z[[as.character(t)]]$colors)
module2genes.z = list()
for(m in modules) module2genes.z[[m]] = colnames(mat.z)[modules.z[[as.character(t)]]$colors == m]
tmp = data.frame(Module = rep(modules, sapply(module2genes.z,length)),
Gene = unlist(module2genes.z),
stringsAsFactors = F)
write.xlsx(tmp, file="2.5_Module.GeneList.Zooxanthellae.UT.xlsx")
TERM2GENE = gene2GO.z[,c("GO", "Gene")]
genes_not_found = tmp %>% filter(!Gene %in% TERM2GENE$Gene) %>% pull(Gene)
TERM2GENE = rbind(TERM2GENE, cbind(GO="No hit", Gene=genes_not_found))
TERM2GENE = subset(TERM2GENE, Gene %in% tmp$Gene)
gsa.GO.z = list()
for(m in modules){
gsa = enricher(module2genes.z[[m]],
minGSSize=5, maxGSSize=500,
universe=colnames(mat.z), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE)
gsa = as.data.frame(gsa)
terms = go2term(gsa$ID) %>% column_to_rownames("go_id")
gsa$Description = terms[gsa$ID, 1]
if(nrow(gsa) > 0) gsa = data.frame(Module=m, gsa, row.names=NULL)
else gsa = NULL
gsa.GO.z[[m]] = gsa
}
gsa.GO.mat.z = do.call(rbind, gsa.GO.z)
rownames(gsa.GO.mat.z) = NULL
write.xlsx(gsa.GO.mat.z, file="2.5_Module.GSA.GO.Zooxanthellae.UT.xlsx")
save(gsa.GO.z, file="2.5_Module.GSA.GO.Zooxanthellae.UT.RData")
load("2.5_Module.GSA.GO.Zooxanthellae.UT.RData")
load("2.5_Module.GSA.KEGG.Zooxanthellae.UT.RData")
# Modules correlated with temperature
modules = rbind(c("steelblue", "grey20", "#00000000", 2),
c("plum2", "grey20", "#00000000", 5),
c("thistle2", "grey20", "#00000000", 5),
c("skyblue", "grey20", "#00000000", 5),
c("skyblue3", "grey20", "#00000000", 5),
c("purple", "grey70", "#00000000", 5),
c("floralwhite", "grey20", "grey", 5))
limits = c(0, 12)
merged_barGSA(gsa.GO.z, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Zooxanthellae.Temperature.pdf", width=3.8, height=5)
modules = rbind(c("steelblue", "grey20", "#00000000", 2),
c("plum2", "grey20", "#00000000", 3),
c("thistle2", "grey20", "#00000000", 5),
c("skyblue", "grey20", "#00000000", 5),
c("skyblue3", "grey20", "#00000000", 3.5),
c("purple", "grey70", "#00000000", 2))
#c("floralwhite", "grey20", "grey", 0))
limits = c(0, 10)
merged_barGSA(gsa.KEGG.z, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Zooxanthellae.Temperature.KEGG.pdf", width=3.8, height=3.5)
# Modules correlated with temperature
modules = rbind(#c("mediumpurple3", "grey20", "#00000000", 2),
#c("darkorange", "grey20", "#00000000", 5),
c("paleturquoise", "grey20", "#00000000", 3.5),
c("royalblue", "black", "#00000000", 5))
#c("orangered", "grey20", "#00000000", 5))
limits = c(0, 5)
merged_barGSA(gsa.GO.z, modules, limits) +
ggsave("2.5_GeneSetAnalysis.UT.v2/Zooxanthellae.Colony-dependent.pdf", width=3.8, height=1.5)