Correlation patternsΒΆ
library(tidyverse)
library(useful)
library(pheatmap)
Load files
# matFPKM.c : Gene expression table (FPKM level) of corals
# matTPM.c : Gene expression table (TPM level) of corals
# matFPKM.z : Gene expression table (FPKM level) of zooxanthellae
# matTPM.z : Gene expression table (TPM level) of zooxanthellae
# meta : Metadata
load("Expression.UT.RData")
# Log transformation
matFPKM.c.log = log2(matFPKM.c + 1)
matTPM.c.log = log2(matTPM.c + 1)
matFPKM.z.log = log2(matFPKM.z + 1)
matTPM.z.log = log2(matTPM.z + 1)
rename = function(mat){
x2 = colnames(mat)
x2[colnames(mat) == "201602_Is1"] = "201602_Mi1"
x2[colnames(mat) == "201602_Mi1"] = "201602_Is1"
colnames(mat) = x2
mat = mat[,sort(colnames(mat))]
mat
}
matFPKM.c.log = rename(matFPKM.c.log)
matTPM.c.log = rename(matTPM.c.log)
matFPKM.z.log = rename(matFPKM.z.log)
matTPM.z.log = rename(matTPM.z.log)
Coral
Calculate average profile by colony-time
samples = apply(meta[colnames(matFPKM.c.log), c("Month", "Colony")], 1, paste, collapse="_")
mat = matFPKM.c.log
mat_colony = t(apply(matFPKM.c.log, 1, tapply, samples, mean))
Pick-up highly variable genes
threshold = 20 # Select top 20% of variable genes
vars = apply(mat_colony, 1, var)
genes = names(vars)[rev(order(vars))[1:(length(vars)*threshold/100)]]
Heatmap of correlation coefficients
cormat = cor(mat_colony[genes,])
# Sort by colony
colony = meta[rownames(cormat), "Colony"]
idx = order(colony)
cormat = cormat[idx, idx]
# Annotationa
annotation = data.frame(Colony=colony[idx], row.names = rownames(cormat))
annotation_colors = list(Colony = color.colony)
# Labels
labels = meta[rownames(cormat), "Month"]
# Make figure
cormat[cormat < 0.7] = 0.7
pheatmap(cormat,
cluster_rows = F,
cluster_cols = F,
annotation_row = annotation,
annotation_col = annotation,
labels_row = labels,
labels_col = labels,
annotation_colors = annotation_colors,
breaks = seq(0.7, 1, length=100))
# Sort by time
colony = meta[rownames(cormat), "Month"]
idx = order(colony)
cormat = cormat[idx, idx]
# Make figure
pheatmap(cormat,
cluster_rows = F,
cluster_cols = F,
annotation_row = annotation,
annotation_col = annotation,
labels_row = labels,
labels_col = labels,
annotation_colors = annotation_colors,
breaks = seq(0.7, 1, length=100))
cormat = cor(mat[genes,])
figmat = matrix(nrow=0, ncol=3)
# Colony
colony = meta[rownames(cormat), "Colony"]
for(g in unique(colony)){
idx = which(colony == g)
tmp1 = data.frame(Category="Colony", Group="Intra-colony", Value=as.numeric(as.dist(cormat[idx,idx])))
tmp2 = data.frame(Category="Colony", Group="Inter-colony", Value=as.numeric(cormat[idx,-idx]))
figmat = rbind(figmat, tmp1, tmp2)
}
# Sampling time
time = meta[rownames(cormat), "Month"]
for(g in unique(time)){
idx = which(time == g)
tmp1 = data.frame(Category="Time", Group="Intra-time", Value=as.numeric(as.dist(cormat[idx,idx])))
tmp2 = data.frame(Category="Time", Group="Inter-time", Value=as.numeric(cormat[idx,-idx]))
figmat = rbind(figmat, tmp1, tmp2)
}
# Sampling site
site = meta[rownames(cormat), "Site"]
for(g in unique(site)){
idx = which(site == g)
tmp1 = data.frame(Category="Site", Group="Intra-site", Value=as.numeric(as.dist(cormat[idx,idx])))
tmp2 = data.frame(Category="Site", Group="Inter-site", Value=as.numeric(cormat[idx,-idx]))
figmat = rbind(figmat, tmp1, tmp2)
}
fig = ggplot(figmat) +
geom_boxplot(aes(x=Group, y=Value), coef=Inf) +
facet_grid(~Category, scales="free_x") +
xlab(NULL) + ylab("Correlation coefficient") +
theme_bw() +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, size=12)) +
scale_y_continuous(limits=c(0.6, 1))
fig + ggsave("1_Correlation.Boxplot.Coral.UT.pdf", width=2.5, height=3)
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
Zooxanthellae
Calculate average profile by colony-time
samples = apply(meta[colnames(matFPKM.z.log), c("Month", "Colony")], 1, paste, collapse="_")
mat = matFPKM.z.log
mat_colony = t(apply(matFPKM.z.log, 1, tapply, samples, mean))
Pick-up highly variable genes
threshold = 20 # Select top 20% of variable genes
vars = apply(mat_colony, 1, var)
genes = names(vars)[rev(order(vars))[1:(length(vars)*threshold/100)]]
Heatmap of correlation coefficients
cormat = cor(mat_colony[genes,])
# Sort by colony
colony = meta[rownames(cormat), "Colony"]
idx = order(colony)
cormat = cormat[idx, idx]
# Annotation
annotation = data.frame(Colony=colony[idx], row.names = rownames(cormat))
annotation_colors = list(Colony = color.colony)
# Labels
labels = meta[rownames(cormat), "Month"]
# Make figure
cormat[cormat < 0.7] = 0.7
pheatmap(cormat,
cluster_rows = F,
cluster_cols = F,
annotation_row = annotation,
annotation_col = annotation,
labels_row = labels,
labels_col = labels,
annotation_colors = annotation_colors,
breaks = seq(0.7, 1, length=100))
# Sort by time
time = meta[rownames(cormat), "Month"]
idx = order(time)
cormat = cormat[idx, idx]
# Labels
labels = meta[rownames(cormat), "Colony"]
# Annotation
annotation = data.frame(Time=time[idx], row.names = rownames(cormat))
annotation_colors = ggsci::pal_d3("category10")(10)
names(annotation_colors) = unique(annotation$Time)
annotation_colors = list(Time = annotation_colors)
# Make figure
pheatmap(cormat,
cluster_rows = F,
cluster_cols = F,
annotation_row = annotation,
annotation_col = annotation,
labels_row = labels,
labels_col = labels,
annotation_colors = annotation_colors,
breaks = seq(0.7, 1, length=100))
Heatmap of correlation coefficients
cormat = cor(mat[genes,])
figmat = matrix(nrow=0, ncol=3)
# Colony
colony = meta[rownames(cormat), "Colony"]
for(g in unique(colony)){
idx = which(colony == g)
tmp1 = data.frame(Category="Colony", Group="Intra-colony", Value=as.numeric(as.dist(cormat[idx,idx])))
tmp2 = data.frame(Category="Colony", Group="Inter-colony", Value=as.numeric(cormat[idx,-idx]))
figmat = rbind(figmat, tmp1, tmp2)
}
# Sampling time
time = meta[rownames(cormat), "Month"]
for(g in unique(time)){
idx = which(time == g)
tmp1 = data.frame(Category="Time", Group="Intra-time", Value=as.numeric(as.dist(cormat[idx,idx])))
tmp2 = data.frame(Category="Time", Group="Inter-time", Value=as.numeric(cormat[idx,-idx]))
figmat = rbind(figmat, tmp1, tmp2)
}
# Sampling site
site = meta[rownames(cormat), "Site"]
for(g in unique(site)){
idx = which(site == g)
tmp1 = data.frame(Category="Site", Group="Intra-site", Value=as.numeric(as.dist(cormat[idx,idx])))
tmp2 = data.frame(Category="Site", Group="Inter-site", Value=as.numeric(cormat[idx,-idx]))
figmat = rbind(figmat, tmp1, tmp2)
}
fig = ggplot(figmat) +
geom_boxplot(aes(x=Group, y=Value), coef=Inf) +
facet_grid(~Category, scales="free_x") +
xlab(NULL) + ylab("Correlation coefficient") +
theme_bw() +
theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, size=12)) +
scale_y_continuous(limits=c(0.6, 1))
fig + ggsave("1_Correlation.Boxplot.Zooxanthellae.UT.pdf", width=2.5, height=3)
## Warning: Removed 117 rows containing non-finite values (stat_boxplot).
## Warning: Removed 117 rows containing non-finite values (stat_boxplot).