1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| import requests from bs4 import BeautifulSoup
file_path = "D:/OneDrive/NAS/科研相关/PhData/data/生信挖掘/水稻多效基因/data/NCBI.BioProject.Rice.txt"
file_out = open("D:/OneDrive/NAS/科研相关/PhData/data/生信挖掘/水稻多效基因/data/NCBI.BioProject.Rice.description.txt", "w", encoding='utf-8')
with open(file_path, "r") as f: for line in f:
url = "https://www.ncbi.nlm.nih.gov/bioproject/" + line.replace("\n", "")
response = requests.get(url)
if response.status_code == 200: soup = BeautifulSoup(response.content, "html.parser")
try: description = soup.find("div", id="DescrAll").get_text(strip=True).replace("\n", " ") except AttributeError: description = "None" except UnicodeEncodeError: continue
file_out.write(line.replace("\n", "") + "\t" + description + "\n")
print(line.replace("\n", "") + "\t" + description + "\n")
print("================================================") else: print(f"BioProject{line}: Failed to retrieve the webpage. Status code: {response.status_code}")
|