1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
| import functools import os import subprocess
files = os.listdir('data/fastq/')
samples = []
for i in files: if str.endswith(i,'fastq'): samples.append(str.split(i,'_')[0])
samples = list(set(samples))
f = open('run-this-code.sh','w')
mkdir = 'mkdir fastqc\n' f.write(mkdir) mkdir = 'mkdir mapping\n' f.write(mkdir) mkdir = 'mkdir counts\n' f.write(mkdir) mkdir = 'mkdir bam\n' f.write(mkdir) mkdir = 'mkdir bam.stat\n' f.write(mkdir)
fastqc = 'fastqc -t 20 data/fastq/*.fastq -o fastqc/\n' f.write(fastqc)
down = 0
for i in samples: down += 1
print_info ='echo 开始运行:' + i + '\n' f.write(print_info)
par_0 = 'hisat2 -p 10 -x data/index/sanqi.index' par_1 = ' -1 data/fastq/' + i + '_1.fastq' par_2 = ' -2 data/fastq/' + i + '_2.fastq' par_3 = ' -S mapping/' + i + '.sam\n'
hisat2 = par_0 + par_1 + par_2 + par_3 f.write(hisat2)
par_4 = 'samtools sort -@ 10 -o bam/' + i + '.sorted.bam mapping/' + i + '.sam\n'
f.write(par_4)
del_sam = 'rm ' + 'mapping/' + i + '.sam\n' f.write(del_sam)
flagstat = 'samtools flagstat -@ 20 ' + 'bam/' + i + '.sorted.bam > bam.stat/' + i + '.bam.stat.txt\n'
f.write(flagstat)
par_8 = 'samtools index bam/' + i + '.sorted.bam\n'
f.write(par_8)
par_1 = 'htseq-count -f bam -r name -i gene_id -s yes -t exon -m union ' par_2 = 'bam/' + i + '.sorted.bam' + ' data/genome/genome.gtf' par_3 = ' > ' + 'counts/' + i + '.count.txt\n' htseq = par_1 + par_2 + par_3 f.write(htseq)
print_info ='echo 已完成:' + i + '\n' f.write(print_info) print_info ='echo 已完成' + str(down) + '/' + str(len(samples)) + '\n' f.write(print_info) print_info ='echo =============================================\n' f.write(print_info)
if down == len(samples): merge_counts = 'python merge.counts.py\n' f.write(merge_counts) print_info ='echo Counts矩阵合并完成!' + '\n' f.write(print_info) getTPMandFPKM = 'Rscript getTPMandFPKM.R\n' f.write(getTPMandFPKM) print_info ='echo TPM与FPKM提取完成!' + '\n' f.write(print_info)
f.close()
subprocess.call('sh run-this-code.sh',shell=True)
|
💌lixiang117423@foxmail.com
💌lixiang117423@gmail.com