library(tidyverse)
mat = read.table("HISAT2_splice_exon/map_rate.txt", sep="\t", header=F)
colnames(mat) = c("QC", "Sample", "Group", "MapRate")
mat = mat %>% filter(grepl("UT", Group)) %>% select(Sample, MapRate)
mat
## Sample MapRate
## 1 201411_Is1 48.46%
## 2 201411_Is2 47.71%
## 3 201411_Is3 39.21%
## 4 201411_Ky1 52.22%
## 5 201411_Ky2 48.03%
## 6 201411_Ky3 47.34%
## 7 201411_Mi1 41.90%
## 8 201411_Mi2 44.07%
## 9 201411_Mi3 51.68%
## 10 201502_Is1 64.34%
## 11 201502_Is2 68.98%
## 12 201502_Is3 64.64%
## 13 201502_Ky1 59.74%
## 14 201502_Ky2 57.25%
## 15 201502_Ky3 62.53%
## 16 201502_Mi1 49.81%
## 17 201502_Mi2 49.84%
## 18 201502_Mi3 52.88%
## 19 201505_Is1 61.65%
## 20 201505_Is2 61.24%
## 21 201505_Is3 66.21%
## 22 201505_Mi1 62.41%
## 23 201505_Mi2 59.64%
## 24 201505_Mi3 57.21%
## 25 201508_Is1 52.84%
## 26 201508_Is2 52.56%
## 27 201508_Is3 49.57%
## 28 201508_Mi1 41.65%
## 29 201508_Mi2 39.81%
## 30 201508_Mi3 50.22%
## 31 201511_Is1 50.44%
## 32 201511_Is2 57.19%
## 33 201511_Is3 48.11%
## 34 201511_Mi1 43.43%
## 35 201511_Mi2 46.68%
## 36 201511_Mi3 48.44%
## 37 201602_Is1 54.43%
## 38 201602_Is2 58.04%
## 39 201602_Is3 62.02%
## 40 201602_Mi1 65.11%
## 41 201602_Mi2 53.94%
## 42 201602_Mi3 54.47%
## 43 201603_Is1 49.79%
## 44 201603_Is2 55.70%
## 45 201603_Is3 51.34%
## 46 201603_Mi1 49.07%
## 47 201603_Mi2 52.45%
## 48 201603_Mi3 51.61%
## 49 201605_Is1 53.62%
## 50 201605_Is2 57.66%
## 51 201605_Is3 54.34%
## 52 201605_Mi1 61.27%
## 53 201605_Mi2 61.60%
## 54 201605_Mi3 59.05%
## 55 201608_Is1 51.35%
## 56 201608_Is2 60.69%
## 57 201608_Is3 45.52%
## 58 201608_Mi1 39.61%
## 59 201608_Mi2 43.00%
## 60 201608_Mi3 33.37%
## 61 201609_Is1 34.86%
## 62 201609_Is2 51.61%
## 63 201609_Is3 50.35%
figmat = mat %>%
mutate(MapRate = as.numeric(gsub("%","",MapRate)))
summary(figmat$MapRate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 33.37 48.27 52.22 52.63 58.55 68.98
fig1 = ggplot(figmat) +
geom_histogram(aes(x=MapRate, y=..count..), bins=20, fill="grey80") +
geom_vline(xintercept = mean(figmat$MapRate), color="red", size=1) +
scale_x_continuous(limits = c(20, 80)) +
scale_y_continuous(limits = c(0, 14), breaks=seq(0, 14, by=2)) +
theme_bw() +
xlab("Mapping rate [%]") + ylab("Count")
fig1