1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
| import os import re import argparse
parser = argparse.ArgumentParser(description="You should add those parameters") parser.add_argument("-i","--input", type = str, help = "The input fasta file") parser.add_argument("-min","--min_length", type = int, help = "The minimal length of sequences") parser.add_argument("-max","--max_length", type = int, default = 0, help = "The maximal length of sequences") parser.add_argument("-o","--output", type = str, help = "The output fasta file")
args =parser.parse_known_args()[0]
file_open = open(args.input,"r") file_read = file_open.readlines() res_dict = {} for line in file_read: if re.match(">",line): res_dict[line] = "" flag = line else: res_dict[flag] = res_dict[flag] + line
final_results = [] for key, value in res_dict.items(): if args.max_length == 0: if len(value) >= args.min_length: final_results.append(key) final_results.append(value) else: if args.min_length <= len(value) <= args.max_length: final_results.append(key) final_results.append(value)
file_new = open(args.output,"w") for i in final_results: file_new.write(i + "\n")
file_new.close()
|