library(tidyverse)
library(igraph)
library(openxlsx)
library(useful)
library(edgeR)
library(clusterProfiler)
library(pheatmap)
Load files
rename = function(mat){
x2 = colnames(mat)
x2[colnames(mat) == "201602_Is1"] = "201602_Mi1"
x2[colnames(mat) == "201602_Mi1"] = "201602_Is1"
colnames(mat) = x2
mat = mat[,sort(colnames(mat))]
mat
}
load("../Microbiome/1_BacterialComposition.RData")
genus = t(apply(genustab.c, 1, function(x, s) x/s*100, colSums(genustab.c)))
corner(genus)
load("Expression.UT.RData")
matCount.c = rename(matCount.c)
matFPKM.c = rename(matFPKM.c)
matCount.c = matCount.c[,grep("Ky", colnames(matCount.c), invert=T)]
matFPKM.c = matFPKM.c[,grep("Ky", colnames(matFPKM.c), invert=T)]
corner(matCount.c)
load("../Annotation/coral/Annotation.Coral.UT.RData")
TERM2GENE.GO = gene2GO.c[,c("GO", "Gene")]
genes_not_found = rownames(matCount.c)[!(rownames(matCount.c) %in% TERM2GENE.GO$Gene)]
TERM2GENE.GO = rbind(TERM2GENE.GO, cbind(GO="No hit", Gene=genes_not_found))
head(TERM2GENE.GO)
TERM2GENE.KO = gene2KO.c %>% left_join(ko2pathway) %>% select(Pathway, Gene)
genes_not_found = rownames(matCount.c)[!(rownames(matCount.c) %in% TERM2GENE.KO$Gene)]
TERM2GENE.KO = rbind(TERM2GENE.KO, cbind(Pathway="No hit", Gene=genes_not_found))
head(TERM2GENE.KO)
Functions
make_heatmap = function(mat, up_genes, down_genes, filename, width, height){
# Sort by colony
colony = meta[colnames(mat), "Colony"]
idx = order(colony)
mat = mat[c(up_genes,down_genes), idx]
#
gaps_col = cumsum(table(colony))
gaps_col = gaps_col[1:(length(gaps_col)-1)]
# Annotation
annotation_col = data.frame(Colony=colony[idx], row.names=colnames(mat))
annotation_colors = list(Colony = color.colony)
# Labels
labels = meta[colnames(mat), "Month"]
# Make figure
pheatmap(mat,
scale="row",
cluster_rows = F,
cluster_cols = F,
annotation_col = annotation_col,
gaps_row = length(up_genes),
gaps_col = gaps_col,
labels_row = rep("", nrow(mat)),
labels_col = rep("", ncol(mat)), #labels,
annotation_colors = annotation_colors,
breaks = seq(-2, 2, length=100),
filename = filename,
width = width,
height = height)
}
barGSA = function(GSA_result, n, font=3, color_bar, font_color="grey20", border_color="#00000000", limits=c(0, 5)){
mat = GSA_result[1:min(c(n, nrow(GSA_result))), ]
figmat = data.frame(P=-log10(mat$p.adjust),
Term=Hmisc::capitalize(mat$Description),
stringsAsFactors = F)
figmat$Term = gsub("MRNA", "mRNA", figmat$Term)
figmat$Term = gsub("RRNA", "rRNA", figmat$Term)
figmat$Term = gsub("^Ko:", "ko:", figmat$Term)
figmat$Term = factor(figmat$Term, levels=rev(figmat$Term))
y.lab = max(figmat$P) * 0.02
fig = ggplot(figmat) +
geom_bar(aes(x=Term, y=P), stat="identity", fill=color_bar, color=border_color) +
geom_text(aes(label=Term, x=Term), y=max(limits)*0.01, hjust=0, size=font, color=font_color) +
coord_flip() + theme_minimal() + xlab(NULL) + ylab(NULL) +
theme(axis.text.x = element_text(size=10), axis.text.y = element_blank(),
axis.ticks.y = element_blank(), axis.title.x = element_text(size=10),
plot.title = element_blank()) +
scale_y_continuous(limits=limits)
fig
}
gene_boxplot = function(mat, gene, title){
figmat = data.frame(Colony = meta[colnames(mat), "Colony"],
Value = as.numeric(mat[gene,]))
fig = ggplot(figmat) +
geom_boxplot(aes(x=Colony, y=Value), coef=10, na.rm=T) +
geom_jitter(aes(x=Colony, y=Value, color=Colony), width=0.2, height=0) +
scale_color_manual(values=color.colony) +
theme_minimal() + xlab(NULL) + ylab("log2(FPKM+1)") +
ggtitle(paste0(gene, "\n", title)) + theme(plot.title = element_text(hjust=0.5, size=12), legend.position="none")
fig
}
Model 2: Endozoicomonas (Is1,Is2 vs Others)
tmp = meta[colnames(matCount.c),] %>% mutate(Bacteria = Colony %in% c("Is1", "Is2"))
group = tmp$Bacteria
design = model.matrix(~Bacteria, data=tmp)
head(design)
d <- DGEList(counts=matCount.c, group=group)
d <- calcNormFactors(d)
d <- estimateGLMCommonDisp(d, design)
d <- estimateGLMTrendedDisp(d, design)
d <- estimateGLMTagwiseDisp(d, design)
fit <- glmFit(d, design)
lrt <- glmLRT(fit, coef = 2)
result.E = topTags(lrt, p.value = 0.01, n=Inf)
load("../Annotation/coral/Acropora_tenuis_UT.StringTie.transcripts.fasta.transdecoder.pep.swissprot_blastp.parse.Uniprot-mapping.RData")
colnames(mapping)[1] = "Uniprot"
mat = result.E %>% as.data.frame() %>%
rownames_to_column("Gene") %>% left_join(gene2Name.c, by="Gene") %>%
filter(abs(logFC) > 1) %>%
left_join(mapping, by="Uniprot") %>% unique()
write.xlsx(mat, file="3.2_Endozoicomonas_DEG.xlsx")
up_genes = result.E %>% as.data.frame() %>% rownames_to_column("Gene") %>%
filter(logFC > log2(2) & FDR < 0.01) %>% pull(Gene)
down_genes = result.E %>% as.data.frame() %>%
rownames_to_column("Gene") %>%
filter(logFC < log2(0.5) & FDR < 0.01) %>%
pull(Gene)
make_heatmap(log2(matFPKM.c+1), up_genes, down_genes, "3.2_Endozoicomonas_DEG.Heatmap.pdf", width=3, height=2.5)
# Upregulated genes
gsa.up = enricher(up_genes,
minGSSize=10, maxGSSize=500,
universe=rownames(matFPKM.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE.GO)
gsa.up = as.data.frame(gsa.up)
terms = go2term(gsa.up$ID) %>% column_to_rownames("go_id")
gsa.up$Description = terms[gsa.up$ID, 1]
if(nrow(gsa.up) > 0) gsa.up = data.frame(Group="up", gsa.up, row.names=NULL)
# Downregulated genes
gsa.down = enricher(down_genes,
minGSSize=10, maxGSSize=500,
universe=rownames(matFPKM.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE.GO)
gsa.down = as.data.frame(gsa.down)
terms = go2term(gsa.down$ID) %>% column_to_rownames("go_id")
gsa.down$Description = terms[gsa.down$ID, 1]
if(nrow(gsa.down) > 0) gsa.down = data.frame(Group="down", gsa.down, row.names=NULL)
gsa = rbind(gsa.up, gsa.down)
write.xlsx(gsa, file="3.2_Endozoicomonas_DEG.GSA.GO.xlsx")
barGSA(gsa.up, n=10, color_bar="coral", limits=c(0,3), font_color="black", font=2.8) +
ggsave("3.2_Endozoicomonas_DEG.GSA.GO.Up.pdf", width=3.5, height=2.3)
barGSA(gsa.down, n=10, color_bar="turquoise", limits=c(0,6), font_color="black") +
ggsave("3.2_Endozoicomonas_DEG.GSA.GO.Down.pdf", width=3.5, height=1)
# Upregulated genes
gsa.up = enricher(up_genes,
minGSSize=10, maxGSSize=500,
universe=rownames(matFPKM.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE.KO)
gsa.up = as.data.frame(gsa.up)
if(nrow(gsa.up) > 0) gsa.up = data.frame(Group="up", gsa.up, row.names=NULL)
# Downregulated genes
gsa.down = enricher(down_genes,
minGSSize=10, maxGSSize=500,
universe=rownames(matFPKM.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE.KO)
gsa.down = as.data.frame(gsa.down)
if(nrow(gsa.down) > 0) gsa.down = data.frame(Group="down", gsa.down, row.names=NULL)
gsa = rbind(gsa.up, gsa.down)
write.xlsx(gsa, file="3.2_Endozoicomonas_DEG.GSA.KEGG.xlsx")
# Top hit gene (Ephrin receptor)
fig1 = gene_boxplot(log2(matFPKM.c+1), "MSTRG.5097", "Epha5")
fig2 = gene_boxplot(log2(matFPKM.c+1), "MSTRG.31636", "Ephb1")
fig3 = gene_boxplot(log2(matFPKM.c+1), "MSTRG.38764", "Epha3")
cowplot::plot_grid(fig1, fig2, fig3, nrow=1) + ggsave("3.2_Endozoicomonas_DEG.Eph_receptor.pdf", width=6, height=2.5)
fig1 = gene_boxplot(log2(matFPKM.c+1), "MSTRG.12568", "P2rx7")
fig2 = gene_boxplot(log2(matFPKM.c+1), "MSTRG.9878", "P2rx7")
fig3 = gene_boxplot(log2(matFPKM.c+1), "MSTRG.9924", "P2rx7")
cowplot::plot_grid(fig1, fig2, fig3, nrow=1) + ggsave("3.2_Endozoicomonas_DEG.P2X7.pdf", width=6, height=2.5)
Model 2: Anaplasma (Is1,Is3,Mi1,Mi2 vs Others)
tmp = meta[colnames(matCount.c),] %>% mutate(Bacteria = Colony %in% c("Is1", "Is3", "Mi1", "Mi2"))
group = tmp$Bacteria
design = model.matrix(~Bacteria, data=tmp)
head(design)
d <- DGEList(counts=matCount.c, group=group)
d <- calcNormFactors(d)
d <- estimateGLMCommonDisp(d, design)
d <- estimateGLMTrendedDisp(d, design)
d <- estimateGLMTagwiseDisp(d, design)
fit <- glmFit(d, design)
lrt <- glmLRT(fit, coef = 2)
result.A = topTags(lrt, p.value = 0.01, n=Inf)
load("../Annotation/coral/Acropora_tenuis_UT.StringTie.transcripts.fasta.transdecoder.pep.swissprot_blastp.parse.Uniprot-mapping.RData")
colnames(mapping)[1] = "Uniprot"
mat = result.A %>% as.data.frame() %>%
rownames_to_column("Gene") %>% left_join(gene2Name.c, by="Gene") %>%
filter(abs(logFC) > 1) %>%
left_join(mapping, by="Uniprot") %>% unique()
write.xlsx(mat, file="3.2_Anaplasma_DEG.xlsx")
up_genes = result.A %>% as.data.frame() %>% rownames_to_column("Gene") %>%
filter(logFC > 1 & FDR < 0.01) %>% pull(Gene)
down_genes = result.A %>% as.data.frame() %>% rownames_to_column("Gene") %>%
filter(logFC < -1 & FDR < 0.01) %>% pull(Gene)
make_heatmap(log2(matFPKM.c+1), up_genes, down_genes, "3.2_Anaplasma_DEG.Heatmap.pdf", width=3, height=2.5)
# Upregulated genes
gsa.up = enricher(up_genes,
minGSSize=10, maxGSSize=500,
universe=rownames(matFPKM.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE.GO)
gsa.up = as.data.frame(gsa.up)
terms = go2term(gsa.up$ID) %>% column_to_rownames("go_id")
gsa.up$Description = terms[gsa.up$ID, 1]
if(nrow(gsa.up) > 0) gsa.up = data.frame(Group="up", gsa.up, row.names=NULL)
# Downregulated genes
gsa.down = enricher(down_genes,
minGSSize=10, maxGSSize=500,
universe=rownames(matFPKM.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE.GO)
gsa.down = as.data.frame(gsa.down)
terms = go2term(gsa.down$ID) %>% column_to_rownames("go_id")
gsa.down$Description = terms[gsa.down$ID, 1]
if(nrow(gsa.down) > 0) gsa.down = data.frame(Group="down", gsa.down, row.names=NULL)
gsa = rbind(gsa.up, gsa.down)
write.xlsx(gsa, file="3.2_Anaplasma_DEG.GSA.GO.xlsx")
barGSA(gsa.up, n=10, color_bar="coral", limits=c(0,3), font_color="black", font=2.7) +
ggsave("3.2_Anaplasma_DEG.GSA.GO.Up.pdf", width=3, height=2.3)
# Upregulated genes
gsa.up = enricher(up_genes,
minGSSize=10, maxGSSize=500,
universe=rownames(matFPKM.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE.KO)
gsa.up = as.data.frame(gsa.up)
if(nrow(gsa.up) > 0) gsa.up = data.frame(Group="up", gsa.up, row.names=NULL)
# Downregulated genes
gsa.down = enricher(down_genes,
minGSSize=10, maxGSSize=500,
universe=rownames(matFPKM.c), qvalueCutoff=0.1,
TERM2GENE=TERM2GENE.KO)
gsa.down = as.data.frame(gsa.down)
if(nrow(gsa.down) > 0) gsa.down = data.frame(Group="down", gsa.down, row.names=NULL)
gsa = rbind(gsa.up, gsa.down)
write.xlsx(gsa, file="3.2_Anaplasma_DEG.GSA.KEGG.xlsx")