NCBI爬取单子叶双子叶信息

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import os
import requests
from bs4 import BeautifulSoup
import re

file_write = open("/home/lixiang/temp_file/latin.name.group.txt","w")
cot_list = ["eudicots", "monocots"]

file_open = open("/home/lixiang/temp_file/latin.name.txt","r")
file_read = file_open.readlines()
for name in file_read:
url = "https://www.ncbi.nlm.nih.gov/taxonomy/?term=" + name.replace(" ","+")
res = requests.get(url)
soup = BeautifulSoup(res.text, 'html.parser')
item = soup.find("div", class_="supp").find("p").text.split(",")
for i in item:
i = i.replace(" ","")
if i in cot_list:
write_content = name.replace("\n","") + "\t" + i.replace(" ","") + "\n"
file_write.write(write_content)

file_write.close()

NCBI爬取单子叶双子叶信息
https://lixiang117423.github.io/article/monocotdicotncbi/
作者
小蓝哥
发布于
2022年11月15日
许可协议