ctl commited on
Commit
a7b912d
1 Parent(s): d13b71e

CER unittests

Browse files
Files changed (2) hide show
  1. cer.py +2 -2
  2. test_cer.py +119 -0
cer.py CHANGED
@@ -108,9 +108,9 @@ class CER(datasets.Metric):
108
  codebase_urls=["https://github.com/jitsi/jiwer/"],
109
  reference_urls=[
110
  "https://en.wikipedia.org/wiki/Word_error_rate",
111
- "https://sites.google.com/site/textdigitisation/qualitymeasures/computingerrorrates#whitespace",
112
  ],
113
  )
114
 
115
  def _compute(self, predictions, references):
116
- return jiwer.wer(references, predictions, truth_transform=cer_transform, hypothesis_transform=cer_transform)
 
108
  codebase_urls=["https://github.com/jitsi/jiwer/"],
109
  reference_urls=[
110
  "https://en.wikipedia.org/wiki/Word_error_rate",
111
+ "https://sites.google.com/site/textdigitisation/qualitymeasures/computingerrorrates",
112
  ],
113
  )
114
 
115
  def _compute(self, predictions, references):
116
+ return jiwer.wer(references, predictions, truth_transform=cer_transform, hypothesis_transform=cer_transform)
test_cer.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The HuggingFace Datasets Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ import unittest
16
+ import cer
17
+
18
+ cer = cer.CER()
19
+
20
+ class TestCER(unittest.TestCase):
21
+ def test_cer_case_senstive(self):
22
+ refs = ['White House']
23
+ preds = ['white house']
24
+ # S = 2, D = 0, I = 0, N = 9, CER = 2 / 11
25
+ char_error_rate = cer.compute(predictions=preds, references=refs)
26
+ self.assertTrue(abs(char_error_rate - 0.1818181818) < 1e-6)
27
+
28
+ def test_cer_whitespace(self):
29
+ refs = ['were wolf']
30
+ preds = ['werewolf']
31
+ # S = 0, D = 0, I = 1, N = 9, CER = 1 / 9
32
+ char_error_rate = cer.compute(predictions=preds, references=refs)
33
+ self.assertTrue(abs(char_error_rate - 0.1111111) < 1e-6)
34
+
35
+ # consecutive whitespaces case 0
36
+ refs = ['werewolf']
37
+ preds = ['weae wolf']
38
+ # S = 1, D = 1, I = 0, N = 8, CER = 0.25
39
+ char_error_rate = cer.compute(predictions=preds, references=refs)
40
+ self.assertTrue(abs(char_error_rate - 0.25) < 1e-6)
41
+
42
+ # consecutive whitespaces case 1
43
+ refs = ['were wolf']
44
+ preds = ['were wolf']
45
+ # S = 0, D = 0, I = 0, N = 9, CER = 0
46
+ char_error_rate = cer.compute(predictions=preds, references=refs)
47
+ self.assertTrue(abs(char_error_rate - 0.0) < 1e-6)
48
+
49
+ # consecutive whitespaces case 2
50
+ refs = ['were wolf']
51
+ preds = ['were wolf']
52
+ # S = 0, D = 0, I = 0, N = 9, CER = 0
53
+ char_error_rate = cer.compute(predictions=preds, references=refs)
54
+ self.assertTrue(abs(char_error_rate - 0.0) < 1e-6)
55
+
56
+ def test_cer_sub(self):
57
+ refs = ['werewolf']
58
+ preds = ['weaewolf']
59
+ # S = 1, D = 0, I = 0, N = 8, CER = 0.125
60
+ char_error_rate = cer.compute(predictions=preds, references=refs)
61
+ self.assertTrue(abs(char_error_rate - 0.125) < 1e-6)
62
+
63
+ def test_cer_del(self):
64
+ refs = ['werewolf']
65
+ preds = ['wereawolf']
66
+ # S = 0, D = 1, I = 0, N = 8, CER = 0.125
67
+ char_error_rate = cer.compute(predictions=preds, references=refs)
68
+ self.assertTrue(abs(char_error_rate - 0.125) < 1e-6)
69
+
70
+ def test_cer_insert(self):
71
+ refs = ['werewolf']
72
+ preds = ['wereolf']
73
+ # S = 0, D = 0, I = 1, N = 8, CER = 0.125
74
+ char_error_rate = cer.compute(predictions=preds, references=refs)
75
+ self.assertTrue(abs(char_error_rate - 0.125) < 1e-6)
76
+
77
+ def test_cer_equal(self):
78
+ refs = ['werewolf']
79
+ char_error_rate = cer.compute(predictions=refs, references=refs)
80
+ self.assertEqual(char_error_rate, 0.0)
81
+
82
+ def test_cer_list_of_seqs(self):
83
+ refs = ['werewolf', 'I am your father']
84
+ char_error_rate = cer.compute(predictions=refs, references=refs)
85
+ self.assertEqual(char_error_rate, 0.0)
86
+
87
+ refs = ['werewolf', 'I am your father', 'doge']
88
+ preds = ['werxwolf', 'I am your father', 'doge']
89
+ # S = 1, D = 0, I = 0, N = 28, CER = 1 / 28
90
+ char_error_rate = cer.compute(predictions=preds, references=refs)
91
+ self.assertTrue(abs(char_error_rate - 0.03571428) < 1e-6)
92
+
93
+ def test_cer_unicode(self):
94
+ ref = [u'我能吞下玻璃而不伤身体']
95
+ pred = [u' 能吞虾玻璃而 不霜身体啦']
96
+ # S = 3, D = 2, I = 0, N = 11
97
+ # CER = 5 / 11
98
+ char_error_rate = cer.compute(predictions=pred, references=ref)
99
+ self.assertTrue(abs(char_error_rate - 0.4545454545) < 1e-6)
100
+
101
+ ref = [u'我能吞', u'下玻璃而不伤身体']
102
+ pred = [u'我 能 吞 下 玻 璃', u'而不伤身体']
103
+ # S = 0, D = 5, I = 0, N = 11
104
+ # CER = 5 / 11
105
+ char_error_rate = cer.compute(predictions=pred, references=ref)
106
+ self.assertTrue(abs(char_error_rate - 0.454545454545) < 1e-6)
107
+
108
+ ref = [u'我能吞下玻璃而不伤身体']
109
+ char_error_rate = cer.compute(predictions=ref, references=ref)
110
+ self.assertFalse(char_error_rate, 0.0)
111
+
112
+ def test_cer_empty(self):
113
+ ref = ''
114
+ pred = 'Hypothesis'
115
+ with self.assertRaises(ValueError):
116
+ char_error_rate = cer.compute(predictions=pred, references=ref)
117
+
118
+ if __name__ == '__main__':
119
+ unittest.main()