-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathseq_convert.py
More file actions
173 lines (144 loc) · 4.98 KB
/
seq_convert.py
File metadata and controls
173 lines (144 loc) · 4.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/python
#############################################################################
#Converts sequence formats from one to another. Converts between any of
#FASTA, Phylip, and Nexus formats. Please note, this script assumes the
#file extensions of .fa, .ph, and .nex, respectively, for those formats.
#
#Dependencies: core
#
#Gregg Thomas, Summer 2015
#############################################################################
import sys, os, argparse
sys.path.append(sys.path.append(os.path.dirname(os.path.realpath(__file__)) + "/corelib/"))
import core
############################################
#Function Definitions
############################################
def optParse(errorflag):
#This function handles the command line options.
parser = argparse.ArgumentParser(description="Converts sequence formats from one to another. Converts between any of FASTA, Phylip, and Nexus formats. Please note, this script assumes the file extensions of .fa, .ph, and .nex, respectively, for those formats. Dependencies: core");
parser.add_argument("-i", dest="input", help="Input. Either a directory containing many sequence files or a single sequence file.");
parser.add_argument("-f", dest="input_type", help="The format of the input sequences.");
parser.add_argument("-o", dest="output", help="Output. Either the directory where files will be written or simply an output file name.");
parser.add_argument("-t", dest="output_type", help="The desired output format of the sequences.");
args = parser.parse_args();
if errorflag == 0:
if args.input == None or args.output == None:
parser.print_help();
sys.exit();
intype = args.input_type.lower();
outtype = args.output_type.lower();
for t in [intype, outtype]:
if t not in ["fasta", "phylip", "nexus", "fa", "phy", "ph", "nex", "f", "p", "n"]:
core.errorOut(1, "-f and -t must take values of fasta, nexus, or phylip");
optParse(1);
return args.input, intype[:1], args.output, outtype[:1];
elif errorflag == 1:
parser.print_help();
sys.exit();
############################################
#Main Block
############################################
ins, fr, outs, to = optParse(0);
if os.path.isfile(ins):
fileflag = 1;
filelist = [ins];
else:
fileflag = 0;
if not os.path.isdir(ins):
errorOut(0, "-i must be a valid directory path");
sys.exit();
ins = os.path.abspath(ins);
if ins[-1] != "/":
ins = ins + "/";
outs = os.path.abspath(outs);
if not outs.endswith("/"):
outs = outs + "/";
filelist = os.listdir(ins);
print("==============================================================================================");
print("\t\t\tSequence format conversion");
print("\t\t\t" + core.getDateTime());
if fileflag == 1:
print("INPUT | Converting file: " + ins);
else:
print("INPUT | Converting all files from directory: " + ins);
print("INFO | Input format: " + fr);
print("INFO | Output format: " + to);
if fileflag == 1:
print("OUTPUT | Writing output to file: " + outs);
else:
print("OUTPUT | Writing output files to directory: " + outs);
print("-------------------------------------");
if fileflag == 0:
print(core.getTime() + " | Creating output directory...");
if not os.path.exists(outs):
os.system("mkdir " + outs);
numfiles = len(filelist);
numbars = 0;
donepercent = [];
i = 0;
if fr == "f":
init = ".fa";
elif fr == "p":
init = ".ph";
elif fr == "n":
init = ".nex";
if to == "f":
suffix = ".fa";
elif to == "p":
suffix = ".ph";
elif to == "n":
suffix = ".nex";
firstbar = True
for each in filelist:
if fileflag == 0:
numbars, donepercent, firstbar = core.loadingBar(i, numfiles, donepercent, numbars, firstbar=firstbar, disperc=True);
i = i + 1;
if fr == "f" and each.find(".fa") == -1:
continue;
if fr == "p" and each.find(".ph") == -1:
continue;
if fr == "n" and each.find(".nex") == -1:
continue;
if fileflag == 1:
infilename = each;
outfilename = outs;
else:
infilename = ins + each;
outfilename = outs + each[:each.index(init)] + suffix;
outfile = open(outfilename, "w");
outfile.write("");
outfile.close();
if fr == "f":
inseqs = core.fastaGetDict(infilename);
newseqs = {};
for title in inseqs:
newtitle = title[1:];
newseqs[newtitle] = inseqs[title];
inseqs = newseqs;
if fr == "p":
inseqs = core.phylipGetDict(infilename)[0];
if fr == "n":
inseqs = core.nexusGetDict(infilename);
newseqs = {};
for title in inseqs:
newtitle = title.replace(" ",":");
newseqs[newtitle] = inseqs[title];
inseqs = newseqs;
if to == "f":
newseqs = {};
for title in inseqs:
newtitle = ">" + title;
newseqs[newtitle] = inseqs[title];
inseqs = newseqs;
core.writeFasta(inseqs,outfilename);
if to == "p":
core.writePhylip(inseqs,outfilename);
if to == "n":
core.writeNexus(inseqs,outfilename);
if fileflag == 0:
pstring = "100.0% complete.";
sys.stderr.write('\b' * len(pstring) + pstring);
print("\n");
print(core.getTime() + " | Done!");
print("==============================================================================================");