1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
|
df.ncbi.res.filtered %>% dplyr::select(2) %>% dplyr::distinct_all() %>% magrittr::set_names(c("accession.version")) %>% data.table::as.data.table() -> id
id %>% data.table::fwrite("./accession.id.txt")
vroom::vroom("./taxo.id.2.taxonomy.txt", col_names = FALSE) %>% dplyr::select(1,3) %>% dplyr::rename(taxoid = X1) %>% dplyr::mutate(界 = stringr::str_split(X3, ";") %>% sapply("[",1), 门 = stringr::str_split(X3, ";") %>% sapply("[",2), 纲 = stringr::str_split(X3, ";") %>% sapply("[",3), 目 = stringr::str_split(X3, ";") %>% sapply("[",4), 科 = stringr::str_split(X3, ";") %>% sapply("[",5), 属 = stringr::str_split(X3, ";") %>% sapply("[",6), 种 = stringr::str_split(X3, ";") %>% sapply("[",7)) %>% dplyr::select(-2) -> df.taxo.res
vroom::vroom("./accession2taxoid.txt", col_names = FALSE) %>% magrittr::set_names(c("accession.id", "taxoid")) %>% dplyr::left_join(df.taxo.res, by = "taxoid") %>% dplyr::distinct_all() -> df.acc.taxo.res
df.ncbi.res.filtered %>% dplyr::select(1:3) %>% magrittr::set_names(c("sample","accession.id","相似度")) %>% dplyr::left_join(df.acc.taxo.res, by = "accession.id") %>% dplyr::mutate(temp = paste0(sample, 属)) %>% dplyr::filter(!duplicated(temp)) %>% dplyr::select(-temp) -> df.res.final
df.res.final %>% writexl::write_xlsx("./最终结果.xlsx")
|