-
Notifications
You must be signed in to change notification settings - Fork 49
Expand file tree
/
Copy pathshow-wav-ali.py
More file actions
executable file
·65 lines (52 loc) · 1.91 KB
/
show-wav-ali.py
File metadata and controls
executable file
·65 lines (52 loc) · 1.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
'''
Created on Jul 31, 2014
@author: chorows
'''
import sys
import argparse
import tempfile
from subprocess import check_call
import os
from os import path
import numpy as np
import kaldi_io
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Extract an utterance and convert the alignment to an Audacity label file')
parser.add_argument('wav', help='wav script file')
parser.add_argument('mdl', help='model file (to get transitions)')
parser.add_argument('ali', help='alignemnt')
parser.add_argument('phn', help='phones.txt')
parser.add_argument('utt', help='utterance')
args = parser.parse_args()
#temp_dir = tempfile.mkdtemp()
temp_dir = './tmp'
try:
os.mkdir(temp_dir)
except:
pass
utt=args.utt
wav_file = path.join(temp_dir, '%s.wav' %(utt,))
print >>sys.stderr, "Extracting wav utterance %s" % (utt,)
check_call("wav-copy '%s' 'scp,p:echo %s %s|'" %
(args.wav, utt, wav_file), shell=True)
dur_reader = kaldi_io.RandomAccessPythonReader(
"ark:wav-to-duration 'scp:echo %s %s |' ark,t:-|" %
(utt, wav_file))
dur = dur_reader[utt]
ali_reader = kaldi_io.RandomAccessInt32PairVectorReader(
"ark:ali-to-phones --write-lengths '%s' '%s' 'ark:-' |" %
(args.mdl, args.ali))
ali = np.array(ali_reader[utt], dtype=float)
num_frames = ali[:,1].sum()
ali[:,1] = (np.cumsum(ali[:,1]))/num_frames*dur
phones_dict = {n:p for p,n in kaldi_io.SequentialPythonReader('ark:%s' %(args.phn,))}
label_file = path.join(temp_dir, '%s.txt'%(utt,))
last_time = 0.0
with open(label_file, 'w') as lf:
for row in ali:
(phone, time) = row
print >>lf, '%f %f %s' % (last_time, time, phones_dict[phone])
last_time=time
check_call('audacity %s' % (wav_file,), shell=True)
shutil.rmdir(temp_dir)