4 Input data and processing

# read data
df_input <- read_excel("./CH_231.xlsx") %>% as_tibble()

# prepare data
df_input <- df_input %>% select(ID, Sex, Age, DNMT3A:ZRSR2) %>% pivot_longer(cols=DNMT3A:ZRSR2, names_to='gene', values_to='mutation')
# add age group ("agrp")
df_input <- df_input %>% mutate(agrp=cut(Age, breaks=seq(60,100,10), right=F) %>% as.character()) %>% mutate(agrp=ifelse(is.na(agrp),'[100,+)',agrp))

# add longevity group ("lngv")
df_input <- df_input %>% mutate(lngv=ifelse(Age>=90,'Longevous','Others'))
df_input <- df_input %>% select(ID,Sex,Age,agrp,lngv,gene,mutation)

# convert into mutation-level data
df_full <- df_input %>% separate_rows(mutation,sep=';|, ') %>% mutate(vaf=str_replace_all(mutation,'.*\\(|\\%.*','') %>% as.numeric()) %>% mutate(type=ifelse(str_detect(mutation,'^c.'), 'Splicing', mutation)) %>% mutate(type=ifelse(str_detect(type,'del'), 'Deletion', type)) %>% mutate(type=ifelse(str_detect(type,'fs'), 'Frameshift', type)) %>% mutate(type=ifelse(str_detect(type,'\\*'), 'Nonsense', type)) %>% mutate(type=ifelse(str_detect(type,'^p.'), 'Missense', type))

# output into 'CH_summary.xlsx'
df_full %>% openxlsx::write.xlsx(file.path("./",'CH_summary.xlsx'))