-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Expand file tree
/
Copy pathcompute_wer.py
More file actions
executable file
·164 lines (144 loc) · 5.71 KB
/
compute_wer.py
File metadata and controls
executable file
·164 lines (144 loc) · 5.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
#!/usr/bin/env python3
"""This script computes Word Error Rate and other related information.
Just given a reference and a hypothesis, the script closely matches
Kaldi's compute_wer binary.
Additionally, the script can produce human-readable edit distance
alignments, and find the top WER utterances and speakers.
Usage
-----
::
Compute word error rate or a Levenshtein alignmentbetween a hypothesis and a reference.
positional arguments:
ref The ground truth to compare against. Text file with utterance-ID on the first column.
hyp The hypothesis, for which WER is computed. Text file with utterance-ID on the first column.
optional arguments:
-h, --help show this help message and exit
--mode {present,all,strict}
How to treat missing hypotheses.
'present': only score hypotheses that were found
'all': treat missing hypotheses as empty
'strict': raise KeyError if a hypothesis is missing
--print-top-wer Print a list of utterances with the highest WER.
--print-alignments Print alignments for between all refs and hyps.Also has details for individual hyps. Outputs a lot of text.
--align-separator ALIGN_SEPARATOR
When printing alignments, separate tokens with this.Note the spaces in the default.
--align-empty ALIGN_EMPTY
When printing alignments, empty spaces are filled with this.
--utt2spk UTT2SPK Provide a mapping from utterance ids to speaker ids.If provided, print a list of speakers with highest WER.
Authors:
* Aku Rouhe 2020
"""
import speechbrain.dataio.wer as wer_io
import speechbrain.utils.edit_distance as edit_distance
# These internal utilities read Kaldi-style text/utt2spk files:
def _plain_text_reader(path):
# yields key, token_list
with open(path, encoding="utf-8") as fi:
for line in fi:
key, *tokens = line.strip().split()
yield key, tokens
def _plain_text_keydict(path):
out_dict = {} # key: token_list
for key, tokens in _plain_text_reader(path):
out_dict[key] = tokens
return out_dict
def _utt2spk_keydict(path):
utt2spk = {}
with open(path, encoding="utf-8") as fi:
for line in fi:
utt, spk = line.strip().split()
utt2spk[utt] = spk
return utt2spk
if __name__ == "__main__":
import argparse
# See: https://stackoverflow.com/a/22157136
class SmartFormatter(argparse.HelpFormatter):
"""SmartFormatter class."""
def _split_lines(self, text, width):
if text.startswith("R|"):
return text[2:].splitlines()
return argparse.HelpFormatter._split_lines(self, text, width)
parser = argparse.ArgumentParser(
description=(
"Compute word error rate or a Levenshtein alignment"
"between a hypothesis and a reference."
),
formatter_class=SmartFormatter,
)
parser.add_argument(
"ref",
help="The ground truth to compare against. \
Text file with utterance-ID on the first column.",
)
parser.add_argument(
"hyp",
help="The hypothesis, for which WER is computed. \
Text file with utterance-ID on the first column.",
)
parser.add_argument(
"--mode",
help="R|How to treat missing hypotheses.\n"
" 'present': only score hypotheses that were found\n"
" 'all': treat missing hypotheses as empty\n"
" 'strict': raise KeyError if a hypothesis is missing",
choices=["present", "all", "strict"],
default="strict",
)
parser.add_argument(
"--print-top-wer",
action="store_true",
help="Print a list of utterances with the highest WER.",
)
parser.add_argument(
"--print-alignments",
action="store_true",
help=(
"Print alignments for between all refs and hyps."
"Also has details for individual hyps. Outputs a lot of text."
),
)
parser.add_argument(
"--align-separator",
default=" ; ",
help=(
"When printing alignments, separate tokens with this."
"Note the spaces in the default."
),
)
parser.add_argument(
"--align-empty",
default="<eps>",
help="When printing alignments, empty spaces are filled with this.",
)
parser.add_argument(
"--utt2spk",
help="Provide a mapping from utterance ids to speaker ids."
"If provided, print a list of speakers with highest WER.",
)
args = parser.parse_args()
details_by_utterance = edit_distance.wer_details_by_utterance(
_plain_text_keydict(args.ref),
_plain_text_keydict(args.hyp),
compute_alignments=args.print_alignments,
scoring_mode=args.mode,
)
summary_details = edit_distance.wer_summary(details_by_utterance)
wer_io.print_wer_summary(summary_details)
if args.print_top_wer:
top_non_empty, top_empty = edit_distance.top_wer_utts(
details_by_utterance
)
wer_io._print_top_wer_utts(top_non_empty, top_empty)
if args.utt2spk:
utt2spk = _utt2spk_keydict(args.utt2spk)
details_by_speaker = edit_distance.wer_details_by_speaker(
details_by_utterance, utt2spk
)
top_spks = edit_distance.top_wer_spks(details_by_speaker)
wer_io._print_top_wer_spks(top_spks)
if args.print_alignments:
wer_io.print_alignments(
details_by_utterance,
empty_symbol=args.align_empty,
separator=args.align_separator,
)