Correlation patternsΒΆ

1_Correlation.UT.utf8
library(tidyverse)
library(useful)
library(pheatmap)

Load files

# matFPKM.c : Gene expression table (FPKM level) of corals
# matTPM.c : Gene expression table (TPM level) of corals
# matFPKM.z : Gene expression table (FPKM level) of zooxanthellae
# matTPM.z : Gene expression table (TPM level) of zooxanthellae
# meta : Metadata
load("Expression.UT.RData")
# Log transformation
matFPKM.c.log = log2(matFPKM.c + 1)
matTPM.c.log = log2(matTPM.c + 1)
matFPKM.z.log = log2(matFPKM.z + 1)
matTPM.z.log = log2(matTPM.z + 1)
rename = function(mat){
  x2 = colnames(mat)
  x2[colnames(mat) == "201602_Is1"] = "201602_Mi1"
  x2[colnames(mat) == "201602_Mi1"] = "201602_Is1"
  colnames(mat) = x2
  mat = mat[,sort(colnames(mat))]
  mat
}
matFPKM.c.log = rename(matFPKM.c.log)
matTPM.c.log = rename(matTPM.c.log)
matFPKM.z.log = rename(matFPKM.z.log)
matTPM.z.log = rename(matTPM.z.log)

Coral

Calculate average profile by colony-time

samples = apply(meta[colnames(matFPKM.c.log), c("Month", "Colony")], 1, paste, collapse="_")
mat = matFPKM.c.log
mat_colony = t(apply(matFPKM.c.log, 1, tapply, samples, mean))

Pick-up highly variable genes

threshold = 20 # Select top 20% of variable genes
vars = apply(mat_colony, 1, var)
genes = names(vars)[rev(order(vars))[1:(length(vars)*threshold/100)]]

Heatmap of correlation coefficients

cormat = cor(mat_colony[genes,])

# Sort by colony
colony = meta[rownames(cormat), "Colony"]
idx = order(colony)
cormat = cormat[idx, idx]

# Annotationa
annotation = data.frame(Colony=colony[idx], row.names = rownames(cormat))
annotation_colors = list(Colony = color.colony)

# Labels
labels = meta[rownames(cormat), "Month"]

# Make figure
cormat[cormat < 0.7] = 0.7
pheatmap(cormat, 
         cluster_rows = F,
         cluster_cols = F,
         annotation_row = annotation,
         annotation_col = annotation,
         labels_row = labels,
         labels_col = labels,
         annotation_colors = annotation_colors,
         breaks = seq(0.7, 1, length=100))

# Sort by time
colony = meta[rownames(cormat), "Month"]
idx = order(colony)
cormat = cormat[idx, idx]

# Make figure
pheatmap(cormat, 
         cluster_rows = F,
         cluster_cols = F,
         annotation_row = annotation,
         annotation_col = annotation,
         labels_row = labels,
         labels_col = labels,
         annotation_colors = annotation_colors,
         breaks = seq(0.7, 1, length=100))

cormat = cor(mat[genes,])
figmat = matrix(nrow=0, ncol=3)

# Colony
colony = meta[rownames(cormat), "Colony"]
for(g in unique(colony)){
  idx = which(colony == g)
  tmp1 = data.frame(Category="Colony", Group="Intra-colony", Value=as.numeric(as.dist(cormat[idx,idx])))
  tmp2 = data.frame(Category="Colony", Group="Inter-colony", Value=as.numeric(cormat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

# Sampling time
time = meta[rownames(cormat), "Month"]
for(g in unique(time)){
  idx = which(time == g)
  tmp1 = data.frame(Category="Time", Group="Intra-time", Value=as.numeric(as.dist(cormat[idx,idx])))
  tmp2 = data.frame(Category="Time", Group="Inter-time", Value=as.numeric(cormat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

# Sampling site
site = meta[rownames(cormat), "Site"]
for(g in unique(site)){
  idx = which(site == g)
  tmp1 = data.frame(Category="Site", Group="Intra-site", Value=as.numeric(as.dist(cormat[idx,idx])))
  tmp2 = data.frame(Category="Site", Group="Inter-site", Value=as.numeric(cormat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

fig = ggplot(figmat) +
  geom_boxplot(aes(x=Group, y=Value), coef=Inf) +
  facet_grid(~Category, scales="free_x") +
  xlab(NULL) + ylab("Correlation coefficient") +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, size=12)) + 
  scale_y_continuous(limits=c(0.6, 1))
fig + ggsave("1_Correlation.Boxplot.Coral.UT.pdf", width=2.5, height=3)
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).

## Warning: Removed 6 rows containing non-finite values (stat_boxplot).

Zooxanthellae

Calculate average profile by colony-time

samples = apply(meta[colnames(matFPKM.z.log), c("Month", "Colony")], 1, paste, collapse="_")
mat = matFPKM.z.log
mat_colony = t(apply(matFPKM.z.log, 1, tapply, samples, mean))

Pick-up highly variable genes

threshold = 20 # Select top 20% of variable genes
vars = apply(mat_colony, 1, var)
genes = names(vars)[rev(order(vars))[1:(length(vars)*threshold/100)]]

Heatmap of correlation coefficients

cormat = cor(mat_colony[genes,])

# Sort by colony
colony = meta[rownames(cormat), "Colony"]
idx = order(colony)
cormat = cormat[idx, idx]

# Annotation
annotation = data.frame(Colony=colony[idx], row.names = rownames(cormat))
annotation_colors = list(Colony = color.colony)

# Labels
labels = meta[rownames(cormat), "Month"]

# Make figure
cormat[cormat < 0.7] = 0.7
pheatmap(cormat, 
         cluster_rows = F,
         cluster_cols = F,
         annotation_row = annotation,
         annotation_col = annotation,
         labels_row = labels,
         labels_col = labels,
         annotation_colors = annotation_colors,
         breaks = seq(0.7, 1, length=100))

# Sort by time
time = meta[rownames(cormat), "Month"]
idx = order(time)
cormat = cormat[idx, idx]

# Labels
labels = meta[rownames(cormat), "Colony"]

# Annotation
annotation = data.frame(Time=time[idx], row.names = rownames(cormat))
annotation_colors = ggsci::pal_d3("category10")(10)
names(annotation_colors) = unique(annotation$Time)
annotation_colors = list(Time = annotation_colors)

# Make figure
pheatmap(cormat, 
         cluster_rows = F,
         cluster_cols = F,
         annotation_row = annotation,
         annotation_col = annotation,
         labels_row = labels,
         labels_col = labels,
         annotation_colors = annotation_colors,
         breaks = seq(0.7, 1, length=100))

Heatmap of correlation coefficients

cormat = cor(mat[genes,])
figmat = matrix(nrow=0, ncol=3)

# Colony
colony = meta[rownames(cormat), "Colony"]
for(g in unique(colony)){
  idx = which(colony == g)
  tmp1 = data.frame(Category="Colony", Group="Intra-colony", Value=as.numeric(as.dist(cormat[idx,idx])))
  tmp2 = data.frame(Category="Colony", Group="Inter-colony", Value=as.numeric(cormat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

# Sampling time
time = meta[rownames(cormat), "Month"]
for(g in unique(time)){
  idx = which(time == g)
  tmp1 = data.frame(Category="Time", Group="Intra-time", Value=as.numeric(as.dist(cormat[idx,idx])))
  tmp2 = data.frame(Category="Time", Group="Inter-time", Value=as.numeric(cormat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

# Sampling site
site = meta[rownames(cormat), "Site"]
for(g in unique(site)){
  idx = which(site == g)
  tmp1 = data.frame(Category="Site", Group="Intra-site", Value=as.numeric(as.dist(cormat[idx,idx])))
  tmp2 = data.frame(Category="Site", Group="Inter-site", Value=as.numeric(cormat[idx,-idx]))
  figmat = rbind(figmat, tmp1, tmp2)
}

fig = ggplot(figmat) +
  geom_boxplot(aes(x=Group, y=Value), coef=Inf) +
  facet_grid(~Category, scales="free_x") +
  xlab(NULL) + ylab("Correlation coefficient") +
  theme_bw() +
  theme(axis.text.x = element_text(angle=90, hjust=1, vjust=0.5, size=12)) + 
  scale_y_continuous(limits=c(0.6, 1))
fig + ggsave("1_Correlation.Boxplot.Zooxanthellae.UT.pdf", width=2.5, height=3)
## Warning: Removed 117 rows containing non-finite values (stat_boxplot).

## Warning: Removed 117 rows containing non-finite values (stat_boxplot).