forked from ga4gh/ga4gh-server
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgenerate_fasta.py
More file actions
85 lines (69 loc) · 2 KB
/
generate_fasta.py
File metadata and controls
85 lines (69 loc) · 2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
"""
Generate a random FASTA file
"""
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import argparse
import math
import random
import utils
firstLine = ">Generated file\n"
fileName = "generated.fa"
def parseArgs():
"""
Parse the command line args
"""
parser = argparse.ArgumentParser(
description="Generate random FASTA files")
basesDefault = 1000
parser.add_argument(
"--num-bases", "-n", default=basesDefault,
help="number of bases to include; default {}".format(basesDefault))
args = parser.parse_args()
return args
def writeFasta(args):
"""
Write the random fasta file
"""
numBases = args.num_bases
utils.log("writing {} bases to {} ...".format(numBases, fileName))
with open(fileName, 'w') as fastaFile:
fastaFile.write(firstLine)
basesPerLine = 70
numLines = int(math.ceil(numBases / basesPerLine))
baseChoices = ['A', 'G', 'C', 'T']
basesRemaining = numBases
for i in range(numLines):
if basesRemaining < basesPerLine:
basesToWrite = basesRemaining
else:
basesToWrite = basesPerLine
bases = ''.join(
[random.choice(baseChoices) for _ in range(basesToWrite)])
line = "{}\n".format(bases)
fastaFile.write(line)
basesRemaining -= basesToWrite
assert basesRemaining == 0
def zipFasta():
"""
Compress the fasta file
"""
utils.log("zipping {} ...".format(fileName))
cmd = "bgzip {}".format(fileName)
utils.runCommand(cmd)
def indexFasta():
"""
Create index on the fasta file
"""
zipFileName = "{}.gz".format(fileName)
utils.log("indexing {} ...".format(zipFileName))
cmd = "samtools faidx {}".format(zipFileName)
utils.runCommand(cmd)
def main():
args = parseArgs()
writeFasta(args)
zipFasta()
indexFasta()
if __name__ == '__main__':
main()