Locally colinear blocks

LCB_Orthogroup_forPaper.knit
library(useful)
library(tidyverse)
library(Biostrings)
library(RColorBrewer)
library(ggplot2)

Load data

annotation = readRDS("../FeatureTable.rds")

Functions

Extract genome regions between two orthogroups

extract_genome_region = function(species, annotation, start_OG, end_OG){
  remove_idx = c() # index to remove
  regions = list()
  LCB.features = list()
  for(s in species){
    mat = annotation %>% filter(Species == s)
    
    idx_start = which(mat$Orthogroup == start_OG)
    idx_end = which(mat$Orthogroup == end_OG)
    
    if(length(idx_start) != 0 & length(idx_end) != 0){
      mat = mat[idx_start:idx_end,]
      if(idx_start > idx_end){
        # Flip strand
        mat$Strand = c("+"="-", "-"="+")[mat$Strand]
        regions[[s]] = c(start=mat$End[1], end=mat$Start[nrow(mat)], contig=mat$Genome[1])
        
        # Set start & end
        start = -(mat$Start - mat$End[1])
        end = -(mat$End - mat$End[1])
        mat$Start = end
        mat$End = start
      }else{
        regions[[s]] = c(start=mat$Start[1], end=mat$End[nrow(mat)], contig=mat$Genome[1])
        
        # Flip start & end
        start = (mat$Start - mat$Start[1])
        end = (mat$End - mat$Start[1])
        mat$Start = start
        mat$End = end
      }
      LCB.features[[s]] = mat
    }
  }
  list(Genes=do.call(rbind,LCB.features), Region=do.call(rbind,regions))
}

Ephrin B2

Select LCB around the orthogroup

target_LCBs = annotation %>% filter(Preferred_name == "EFNB2")

LCB 737

figmat = annotation %>% filter(LCB == 737) %>%
  mutate(Species = ifelse(Species=="Endozoicomonas_culture.decontamination", "OTU5", Species)) %>%
  mutate(Strand = as.character(Strand))

start_OG = "OG0002735"
end_OG = "OG0001589"

levels = c("OTU5", 
           "Endozoicomonas_arenosclerae_ab112",
           "Endozoicomonas_montiporae_CL-33",
           "Endozoicomonas_montiporae_LMG_24815",
           "Endozoicomonas_numazuensis_DSM_25634")
LCB.features = extract_genome_region(levels, figmat, start_OG, end_OG)

# Make matrix to make figure
figmat = LCB.features$Genes[,c("Orthogroup","Strand","Species","Start","End")]
figmat$Orthogroup = as.character(figmat$Orthogroup)
table(figmat$Orthogroup)
## 
## OG0001589 OG0002585 OG0002735 OG0005010 OG0008402 
##         5         4         5         3         2
figmat$Strand = c("+"=1, "-"=-1)[figmat$Strand]

# Region
LCB.length = apply(LCB.features$Region, 1,
                    function(x) abs(as.numeric(x[1]) - as.numeric(x[2])))
LCB.length = data.frame(Species=names(LCB.length), Length=LCB.length)

# Sort
figmat$Species = factor(figmat$Species, levels=levels)
LCB.length$Species = factor( LCB.length$Species, levels=levels)

# Colors
OG2name = c(OG0001589="OG0001589: ATP-dependent helicase HrpA",
            OG0002585="OG0002585: Ephrin B2",
            OG0002735="OG0002735: phosphate-selective porin OprO and OprP", 
            OG0005010="OG0005010: Unknown",
            OG0008402="OG0008402: Unknown")
colors = RColorBrewer::brewer.pal(5, "Pastel1")
colors[2] = "red"
names(colors) = OG2name
figmat = data.frame(figmat, Name=OG2name[figmat$Orthogroup])

# Make fig
a = ggplot(figmat) + 
  geom_rect(aes(xmin=Start, xmax=End,
                ymin=-1+Strand, 
                ymax=+1+Strand,
                fill=Name)) +
  facet_grid(Species~., switch = "both", scales="free_y") +
  xlab("Length [bp]") +
  theme_minimal() +
  theme(axis.title.y=element_blank(), 
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.x = element_text(size=8), 
        axis.text.x = element_text(size=8), 
        panel.background=element_rect(colour="grey96",fill="grey96"),
        legend.text =  element_text(size = 4.5), # 凡例
        legend.key.size = grid::unit(0.4, "lines"),
        legend.title = element_blank(),
        legend.box.spacing = grid::unit(0, "lines"),
        strip.text.y.left = element_text(angle=0, size=6, hjust=1),
        strip.background = element_rect(fill="white", color="white"),
        strip.placement="outside",
        panel.spacing=unit(0.2, "lines")) + 
  scale_fill_manual(values=colors) + 
  scale_y_continuous(limits=c(-2,2),breaks=NULL, minor_breaks = NULL) + 
  scale_x_continuous(breaks = seq(0,10000,by=5000)) +
  geom_rect(data=LCB.length, aes(xmin=0, xmax=Length, 
                                 ymin=-0.05,
                                 ymax=+0.05), fill="grey40")

pdf("LCB_737.pdf", width=5.5, height=1.1)
plot(a)
dev.off()
## quartz_off_screen 
##                 2
plot(a)

LCB 300

figmat = annotation %>% filter(LCB == 300) %>%
  mutate(Species = ifelse(Species=="Endozoicomonas_culture.decontamination", "OTU5", Species)) %>%
  mutate(Strand = as.character(Strand))
figmat %>% group_by(Orthogroup) %>% summarise(n=length(unique(Species))) %>% arrange(desc(n))
## # A tibble: 52 × 2
##    Orthogroup     n
##    <chr>      <int>
##  1 OG0000098      5
##  2 OG0000365      4
##  3 OG0003180      4
##  4 OG0003181      4
##  5 OG0003954      4
##  6 OG0004580      4
##  7 OG0004581      4
##  8 OG0004583      4
##  9 OG0004584      4
## 10 OG0004585      4
## # … with 42 more rows
start_OG = "OG0004580"
end_OG = "OG0004583"
LCB.features = extract_genome_region(unique(figmat$Species), figmat, start_OG, end_OG)

# Make matrix to make figure
figmat = LCB.features$Genes[,c("Orthogroup","Strand","Species","Start","End")]
figmat$Orthogroup = as.character(figmat$Orthogroup)
table(figmat$Orthogroup)
## 
## OG0000005 OG0000011 OG0000012 OG0000014 OG0000017 OG0000019 OG0000023 OG0000030 
##         3         2         6         3         1         2         2         1 
## OG0000036 OG0000038 OG0000093 OG0000097 OG0000098 OG0000182 OG0000321 OG0000365 
##         2         2         1         1         2         2         1         2 
## OG0000559 OG0000562 OG0002115 OG0002185 OG0002239 OG0002818 OG0002915 OG0003103 
##         2         2         2         2         1         1         2         1 
## OG0003180 OG0003181 OG0003937 OG0003954 OG0004579 OG0004580 OG0004581 OG0004582 
##         2         4         2         2         2         4         4         2 
## OG0004583 OG0004584 OG0004585 OG0004700 OG0004701 OG0005580 OG0005608 OG0008431 
##         4         2         2         1         2         2         2         2 
## OG0008432 OG0008433 OG0008434 OG0008435 OG0008436 OG0008624 
##         2         2         2         2         2         1
figmat$Strand = c("+"=1, "-"=-1)[figmat$Strand]

# Region
LCB.length = apply(LCB.features$Region, 1,
                    function(x) abs(as.numeric(x[1]) - as.numeric(x[2])))
LCB.length = data.frame(Species=names(LCB.length), Length=LCB.length)

# Sort
levels = c("Endozoicomonas_montiporae_CL-33",
           "Endozoicomonas_montiporae_LMG_24815",
           "Endozoicomonas_elysicola_DSM_22380",
           "Endozoicomonas_atrinae_WP70")
figmat$Species = factor(figmat$Species, levels=levels)
LCB.length$Species = factor( LCB.length$Species, levels=levels)

# Colors
OG2name = c(OG0000012="OG0000012: tnp4",
            OG0004583="OG0004583: ygeY",
            OG0004581="OG0004581: Unknown", 
            OG0004580="OG0004580: mocA",
            OG0003181="OG0003181: yqeB",
            OG0000014="OG0000014: transposase",
            OG0000005="OG0000005: Uknown",
            OG0008434="OG0008434: Ephrin B2",
            Others="Others")
colors = c(RColorBrewer::brewer.pal(8, "Pastel1"), "grey60")
colors[8] = "red"
names(colors) = OG2name
figmat = data.frame(figmat, Name=OG2name[figmat$Orthogroup], stringsAsFactors=F) %>%
  mutate(Name = ifelse(is.na(Name), "Othres", Name))

# Make fig
a = ggplot(figmat) + 
  geom_rect(aes(xmin=Start, xmax=End,
                ymin=-1+Strand, 
                ymax=+1+Strand,
                fill=Name)) +
  facet_grid(Species~., switch = "both", scales="free_y") +
  xlab("Length [bp]") +
  theme_minimal() +
  theme(axis.title.y=element_blank(), 
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.title.x = element_text(size=8), 
        axis.text.x = element_text(size=8), 
        panel.background=element_rect(colour="grey96",fill="grey96"),
        legend.text =  element_text(size = 4.5), # 凡例
        legend.key.size = grid::unit(0.4, "lines"),
        legend.title = element_blank(),
        legend.box.spacing = grid::unit(0, "lines"),
        strip.text.y.left = element_text(angle=0, size=6, hjust=1),
        strip.background = element_rect(fill="white", color="white"),
        strip.placement="outside",
        panel.spacing=unit(0.2, "lines")) + 
  scale_fill_manual(values=colors) + 
  scale_y_continuous(limits=c(-2,2),breaks=NULL, minor_breaks = NULL) + 
  scale_x_continuous(breaks = seq(0,50000,by=10000)) +
  geom_rect(data=LCB.length, aes(xmin=0, xmax=Length, 
                                 ymin=-0.05,
                                 ymax=+0.05), fill="grey40")

pdf("LCB_300.pdf", width=5.5, height=1.6)
plot(a)
dev.off()
## quartz_off_screen 
##                 2