forked from espnet/espnet
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmake_stft.sh
More file actions
executable file
·152 lines (125 loc) · 4.16 KB
/
make_stft.sh
File metadata and controls
executable file
·152 lines (125 loc) · 4.16 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/bin/bash
# Copyright 2018 Nagoya University (Tomoki Hayashi)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
# Begin configuration section.
nj=4
n_fft=1024
n_shift=512
win_length=
window=hann
write_utt2num_frames=true
cmd=run.pl
compress=true
normalize=16 # The bit-depth of the input wav files
filetype=mat # mat or hdf5
# End configuration section.
help_message=$(cat <<EOF
Usage: $0 [options] <data-dir> [<log-dir> [<stft-dir>] ]
e.g.: $0 data/train exp/make_stft/train stft
Note: <log-dir> defaults to <data-dir>/log, and <stft-dir> defaults to <data-dir>/data
Options:
--nj <nj> # number of parallel jobs
--cmd (utils/run.pl|utils/queue.pl <queue opts>) # how to run jobs.
--filetype <mat|hdf5|sound.hdf5> # Specify the format of feats file
EOF
)
echo "$0 $*" # Print the command line for logging
. parse_options.sh || exit 1;
if [ $# -lt 1 ] || [ $# -gt 3 ]; then
echo "${help_message}"
exit 1;
fi
set -euo pipefail
data=$1
if [ $# -ge 2 ]; then
logdir=$2
else
logdir=${data}/log
fi
if [ $# -ge 3 ]; then
stftdir=$3
else
stftdir=${data}/data
fi
# make $stftdir an absolute pathname.
stftdir=$(perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' ${stftdir} ${PWD})
# use "name" as part of name of the archive.
name=$(basename ${data})
mkdir -p ${stftdir} || exit 1;
mkdir -p ${logdir} || exit 1;
if [ -f ${data}/feats.scp ]; then
mkdir -p ${data}/.backup
echo "$0: moving ${data}/feats.scp to ${data}/.backup"
mv ${data}/feats.scp ${data}/.backup
fi
scp=${data}/wav.scp
utils/validate_data_dir.sh --no-text --no-feats ${data} || exit 1;
if ${write_utt2num_frames}; then
write_num_frames_opt="--write-num-frames=ark,t:${logdir}/utt2num_frames.JOB"
else
write_num_frames_opt=
fi
if [ "${filetype}" == hdf5 ]; then
ext=h5
else
ext=ark
fi
if [ -f ${data}/segments ]; then
echo "$0 [info]: segments file exists: using that."
split_segments=""
for n in $(seq ${nj}); do
split_segments="${split_segments} ${logdir}/segments.${n}"
done
utils/split_scp.pl ${data}/segments ${split_segments}
${cmd} JOB=1:${nj} ${logdir}/make_stft_${name}.JOB.log \
compute-stft-feats.py \
--win_length ${win_length} \
--n_fft ${n_fft} \
--n_shift ${n_shift} \
--window ${window} \
${write_num_frames_opt} \
--compress=${compress} \
--filetype ${filetype} \
--normalize ${normalize} \
--segment=${logdir}/segments.JOB scp:${scp} \
ark,scp:${stftdir}/raw_stft_${name}.JOB.${ext},${stftdir}/raw_stft_${name}.JOB.scp
else
echo "$0: [info]: no segments file exists: assuming pcm.scp indexed by utterance."
split_scps=""
for n in $(seq ${nj}); do
split_scps="${split_scps} ${logdir}/wav.${n}.scp"
done
utils/split_scp.pl ${scp} ${split_scps} || exit 1;
${cmd} JOB=1:${nj} ${logdir}/make_stft_${name}.JOB.log \
compute-stft-feats.py \
--win_length ${win_length} \
--n_fft ${n_fft} \
--n_shift ${n_shift} \
--window ${window} \
${write_num_frames_opt} \
--compress=${compress} \
--filetype ${filetype} \
--normalize ${normalize} \
scp:${logdir}/wav.JOB.scp \
ark,scp:${stftdir}/raw_stft_${name}.JOB.${ext},${stftdir}/raw_stft_${name}.JOB.scp
fi
# concatenate the .scp files together.
for n in $(seq ${nj}); do
cat ${stftdir}/raw_stft_${name}.${n}.scp || exit 1;
done > ${data}/feats.scp || exit 1
if ${write_utt2num_frames}; then
for n in $(seq ${nj}); do
cat ${logdir}/utt2num_frames.${n} || exit 1;
done > ${data}/utt2num_frames || exit 1
rm ${logdir}/utt2num_frames.* 2>/dev/null
fi
rm -f ${logdir}/wav.*.scp ${logdir}/segments.* 2>/dev/null
# Write the filetype, this will be used for data2json.sh
echo ${filetype} > ${data}/filetype
nf=$(wc -l < ${data}/feats.scp)
nu=$(wc -l < ${data}/wav.scp)
if [ ${nf} -ne ${nu} ]; then
echo "It seems not all of the feature files were successfully ($nf != $nu);"
echo "consider using utils/fix_data_dir.sh $data"
fi
echo "Succeeded creating filterbank features for $name"