9 Figure 5. Correlation between the size of CH versus the number of mutant alleles per individual
9.1 Correlation analysis with scatter plot
# Figure 5A Scatter plot
maxvaf_nummut <- df_all %>% group_by(ID, Sex, Age) %>% summarize(maxvaf = max(maxvaf),nummut = sum(nummut)) %>% ungroup() %>% filter(maxvaf!=0)
#plot
gp_t1 <- ggplot(maxvaf_nummut, aes(x = log10(maxvaf) , y = nummut )) +
theme_classic() +
xlab('log10(maxVAF(%))') +
ylab('Number of mutant alleles')+
geom_point(size = 2,color = '#EC0101',alpha = 0.5) +
theme_bw() +
scale_y_continuous(breaks = seq(1, 6, 1))+
scale_x_continuous(breaks = seq(0.0, 1.6, 0.2))+
theme(axis.title = element_text(size = 16),
axis.text = element_text(size = 14),
axis.ticks.length = unit(0.25,'cm'),
axis.ticks = element_line(size = 1),
panel.border = element_rect(size = 1.5),
panel.grid = element_blank()) +
# add linear regression
geom_smooth(method = 'lm',se = T,color = '#F9B208',size = 1.5,fill = '#FEA82F') +
# add correlation analysis
stat_cor(method = "spearman",digits = 3,size=6)+
geom_vline(xintercept = 1, linetype = "dashed", color = "black",size=0.4)
# save into a file 'correlation_maxvaf_nummut.pdf'
file.path(outdir,"correlation_maxvaf_nummut.pdf") %>% ggsave(gp_t1, width=8, height=4)
9.2 Comparison of the number of mutant alleles per individual
# Figure 5B mutant alleles and maxvaf
data2 <- df_all %>% select(ID,Sex,nummut,maxvaf) %>% group_by(ID,Sex) %>% summarize(nummut=sum(nummut), maxvaf=max(maxvaf)) %>% ungroup() %>% filter(maxvaf!=0)%>% arrange(Sex, -maxvaf) %>% mutate(nummut=ifelse(nummut>=3,">=3",nummut)) %>% mutate(nummut=factor(nummut,levels=c("1","2",">=3")))
#plot
box_nummut <- ggplot(data2, aes(x=nummut, y=log10(maxvaf), color = nummut, fill = nummut)) +
geom_jitter(size = 1,alpha = 0.8,width=.15) +
geom_boxplot(size=0.4,width=.4, alpha=.2,outlier.shape= NA) +
theme_classic() +
labs(y="log10(maxVAF(%))", x="Mutant alleles") +
stat_compare_means(comparisons = list(c("1", "2"), c("2", ">=3"), c("1", ">=3")), method = "wilcox.test", size=4) +
scale_y_continuous(expand = c(0.1,0.1))+
geom_hline(yintercept = 1, linetype = "dashed", color = "black",size=0.4)
# save into a file 'box_nummut.pdf'
file.path(outdir,"box_nummut.pdf") %>% ggsave(box_nummut,width = 4,height = 2)
9.3 Stacked bar plot depicting CH-positive individuals with different numbers of mutant alleles
# Figure 5C different CH size distribution
data3 <- df_all %>% select(ID,Sex,nummut,maxvaf) %>% group_by(ID,Sex) %>% summarize(nummut=sum(nummut), maxvaf=max(maxvaf)) %>% ungroup() %>% filter(maxvaf!=0) %>% arrange(Sex, -maxvaf) %>% mutate(nummut=ifelse(nummut>=3,">=3",nummut)) %>% mutate(nummut=factor(nummut,levels=c("1","2",">=3"))) %>% mutate(vafgrp=cut(maxvaf, breaks=c(0,2,5,10,100), right=F))
df1 <- data3 %>% count(vafgrp,nummut)
df2 <- data3 %>% count(vafgrp) %>% rename(total=n)
df <- df1 %>% inner_join(df2, by='vafgrp') %>% mutate(freq=n/total) %>% mutate(nummut=as.character(nummut)) %>% mutate(nummut=ifelse(nummut %in% c("3","4"),">=3",nummut)) %>% mutate(vafgrp=as.character(vafgrp)) %>% mutate(vafgrp=ifelse(vafgrp=="[10,100)", "[10,+)", vafgrp)) %>% mutate(vafgrp=fct_inorder(vafgrp)) %>% mutate(nummut = factor(nummut, levels = c("1", "2", ">=3")))
#plot
gp_2 <- df %>% ggplot(aes(x=vafgrp, y=freq, fill=nummut)) + geom_bar(width=0.8, stat="identity",position="fill") + scale_fill_brewer(palette="Blues") + theme_classic() + xlab('maxVAF(%)') + ylab('Frequency')
# save into a file 'maxvaf_nummut.pdf'
file.path(outdir,"maxvaf_nummut.pdf") %>% ggsave(gp_2, width=4, height=2)