alessandro trinca tornidor commited on
Commit
e2f3a00
1 Parent(s): 5105802

test: add/update tests

Browse files
aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED
@@ -58,7 +58,7 @@ def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | d
58
  raise ValueError(f"cannot read an empty/None text: '{real_text}'...")
59
  if language is None or len(language) == 0:
60
  raise NotImplementedError(f"Not tested/supported with '{language}' language...")
61
- if file_bytes_or_audiotmpfile is None or len(file_bytes_or_audiotmpfile) == 0 or os.path.getsize(file_bytes_or_audiotmpfile) == 0:
62
  raise ValueError(f"cannot read an empty/None file: '{file_bytes_or_audiotmpfile}'...")
63
 
64
  start0 = time.time()
 
58
  raise ValueError(f"cannot read an empty/None text: '{real_text}'...")
59
  if language is None or len(language) == 0:
60
  raise NotImplementedError(f"Not tested/supported with '{language}' language...")
61
+ if not isinstance(file_bytes_or_audiotmpfile, (bytes, bytearray)) and (file_bytes_or_audiotmpfile is None or len(file_bytes_or_audiotmpfile) == 0 or os.path.getsize(file_bytes_or_audiotmpfile) == 0):
62
  raise ValueError(f"cannot read an empty/None file: '{file_bytes_or_audiotmpfile}'...")
63
 
64
  start0 = time.time()
tests/{test_GetAccuracyFromRecordedAudio.py → test_lambdaSpeechToScore.py} RENAMED
@@ -8,20 +8,62 @@ from aip_trainer.lambdas import lambdaSpeechToScore
8
  from tests import EVENTS_FOLDER
9
 
10
 
11
- text_dict = {
12
- "de": "Ich bin Alex, wer bist du?",
13
- "en": "Hi there, how are you?"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  }
15
 
16
 
17
- def check_output_by_field(output, key, match, expected_output):
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  import re
19
 
20
- assert len(output[key].strip()) > 0
21
- for word in output[key].lstrip().rstrip().split(" "):
22
  word_check = re.findall(match, word.strip())
23
  assert len(word_check) == 1
24
  assert word_check[0] == word.strip()
 
 
 
 
25
  output[key] = expected_output[key]
26
  return output
27
 
@@ -73,13 +115,13 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
73
  inputs = inputs_outputs["inputs"]
74
  outputs = inputs_outputs["outputs"]
75
  for event_name, event_content in inputs.items():
76
- expected_output = outputs[event_name]
77
  output = lambdaSpeechToScore.lambda_handler(event_content, [])
78
  output = json.loads(output)
79
  app_logger.info(
80
- f"output type:{type(output)}, expected_output type:{type(expected_output)}."
81
  )
82
- check_output(self, output, expected_output)
83
 
84
  def test_get_speech_to_score_en_ok(self):
85
  from aip_trainer.lambdas import lambdaSpeechToScore
@@ -88,24 +130,29 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
88
  path = EVENTS_FOLDER / f"test_{language}.wav"
89
  output = lambdaSpeechToScore.get_speech_to_score_dict(
90
  real_text=text_dict[language],
91
- file_bytes_or_audiotmpfile=path,
92
  language=language,
93
  remove_random_file=False,
94
  )
95
- expected_output = {
96
- "real_transcript": text_dict[language],
97
- "ipa_transcript": "ha\u026a ha\u028a \u0259r ju",
98
- "pronunciation_accuracy": "69",
99
- "real_transcripts": text_dict[language],
100
- "matched_transcripts": "hi - how are you",
101
- "real_transcripts_ipa": "ha\u026a \u00f0\u025br, ha\u028a \u0259r ju?",
102
- "matched_transcripts_ipa": "ha\u026a ha\u028a \u0259r ju",
103
- "pair_accuracy_category": "0 2 0 0 0",
104
- "start_time": "0.2245625 1.3228125 0.852125 1.04825 1.3228125",
105
- "end_time": "0.559875 1.658125 1.14825 1.344375 1.658125",
106
- "is_letter_correct_all_words": "11 000001 111 111 1111 ",
107
- }
108
- check_output(self, output, expected_output)
 
 
 
 
 
109
 
110
  def test_get_speech_to_score_de_ok(self):
111
  from aip_trainer.lambdas import lambdaSpeechToScore
@@ -114,24 +161,100 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
114
  path = EVENTS_FOLDER / f"test_{language}.wav"
115
  output = lambdaSpeechToScore.get_speech_to_score_dict(
116
  real_text=text_dict[language],
117
- file_bytes_or_audiotmpfile=path,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  language=language,
119
  remove_random_file=False,
120
  )
121
- expected_output = {
122
- "real_transcript": text_dict[language],
123
- "ipa_transcript": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bksv\u025b\u02d0 b\u025bst\u025b\u02d0 du\u02d0",
124
- "pronunciation_accuracy": "63",
125
- "real_transcripts": text_dict[language],
126
- "matched_transcripts": "ich bin alexwe - beste du",
127
- "real_transcripts_ipa": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bks, v\u0250 b\u026ast du\u02d0?",
128
- "matched_transcripts_ipa": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bksv\u0259 - b\u0259st\u0259 du\u02d0",
129
- "pair_accuracy_category": "0 0 2 2 2 0",
130
- "start_time": "0.0 0.3075 0.62525 2.1346875 1.5785625 2.1346875",
131
- "end_time": "0.328 0.6458125 1.44025 2.4730625 2.15525 2.4730625",
132
- "is_letter_correct_all_words": "111 111 11111 000 1011 111 ",
133
- }
134
- check_output(self, output, expected_output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
 
136
 
137
  if __name__ == "__main__":
 
8
  from tests import EVENTS_FOLDER
9
 
10
 
11
+ text_dict = {"de": "Ich bin Alex, wer bist du?", "en": "Hi there, how are you?"}
12
+ expected_output = {
13
+ "de": {
14
+ "real_transcript": text_dict["de"],
15
+ "ipa_transcript": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bksv\u025b\u02d0 b\u025bst\u025b\u02d0 du\u02d0",
16
+ "pronunciation_accuracy": "63",
17
+ "real_transcripts": text_dict["de"],
18
+ "matched_transcripts": "ich bin alexwe - beste du",
19
+ "real_transcripts_ipa": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bks, v\u0250 b\u026ast du\u02d0?",
20
+ "matched_transcripts_ipa": "\u026a\u00e7 bi\u02d0n a\u02d0l\u025bksv\u0259 - b\u0259st\u0259 du\u02d0",
21
+ "pair_accuracy_category": "0 0 2 2 2 0",
22
+ "start_time": "0.0 0.3075 0.62525 2.1346875 1.5785625 2.1346875",
23
+ "end_time": "0.328 0.6458125 1.44025 2.4730625 2.15525 2.4730625",
24
+ "is_letter_correct_all_words": "111 111 11111 000 1011 111 ",
25
+ },
26
+ "en": {
27
+ "real_transcript": text_dict["en"],
28
+ "ipa_transcript": "ha\u026a ha\u028a \u0259r ju",
29
+ "pronunciation_accuracy": "69",
30
+ "real_transcripts": text_dict["en"],
31
+ "matched_transcripts": "hi - how are you",
32
+ "real_transcripts_ipa": "ha\u026a \u00f0\u025br, ha\u028a \u0259r ju?",
33
+ "matched_transcripts_ipa": "ha\u026a ha\u028a \u0259r ju",
34
+ "pair_accuracy_category": "0 2 0 0 0",
35
+ "start_time": "0.2245625 1.3228125 0.852125 1.04825 1.3228125",
36
+ "end_time": "0.559875 1.658125 1.14825 1.344375 1.658125",
37
+ "is_letter_correct_all_words": "11 000001 111 111 1111 ",
38
+ },
39
  }
40
 
41
 
42
+ def assert_raises_get_speech_to_score_dict(self, real_text, file_bytes_or_audiotmpfile, language, exc, error_message):
43
+ from aip_trainer.lambdas import lambdaSpeechToScore
44
+
45
+ with self.assertRaises(exc):
46
+ try:
47
+ lambdaSpeechToScore.get_speech_to_score_dict(
48
+ real_text, file_bytes_or_audiotmpfile, language, remove_random_file=False
49
+ )
50
+ except exc as e:
51
+ self.assertEqual(str(e), error_message)
52
+ raise e
53
+
54
+
55
+ def check_value_by_field(value, match):
56
  import re
57
 
58
+ assert len(value.strip()) > 0
59
+ for word in value.lstrip().rstrip().split(" "):
60
  word_check = re.findall(match, word.strip())
61
  assert len(word_check) == 1
62
  assert word_check[0] == word.strip()
63
+
64
+
65
+ def check_output_by_field(output, key, match, expected_output):
66
+ check_value_by_field(output[key], match)
67
  output[key] = expected_output[key]
68
  return output
69
 
 
115
  inputs = inputs_outputs["inputs"]
116
  outputs = inputs_outputs["outputs"]
117
  for event_name, event_content in inputs.items():
118
+ current_expected_output = outputs[event_name]
119
  output = lambdaSpeechToScore.lambda_handler(event_content, [])
120
  output = json.loads(output)
121
  app_logger.info(
122
+ f"output type:{type(output)}, expected_output type:{type(current_expected_output)}."
123
  )
124
+ check_output(self, output, current_expected_output)
125
 
126
  def test_get_speech_to_score_en_ok(self):
127
  from aip_trainer.lambdas import lambdaSpeechToScore
 
130
  path = EVENTS_FOLDER / f"test_{language}.wav"
131
  output = lambdaSpeechToScore.get_speech_to_score_dict(
132
  real_text=text_dict[language],
133
+ file_bytes_or_audiotmpfile=str(path),
134
  language=language,
135
  remove_random_file=False,
136
  )
137
+ check_output(self, output, expected_output[language])
138
+
139
+ def test_get_speech_to_score_en_ok_remove_input_file(self):
140
+ import shutil
141
+ from aip_trainer.lambdas import lambdaSpeechToScore
142
+
143
+ language = "en"
144
+ path = EVENTS_FOLDER / f"test_{language}.wav"
145
+ path2 = EVENTS_FOLDER / f"test2_{language}.wav"
146
+ shutil.copy(path, path2)
147
+ assert path2.exists() and path2.is_file()
148
+ output = lambdaSpeechToScore.get_speech_to_score_dict(
149
+ real_text=text_dict[language],
150
+ file_bytes_or_audiotmpfile=str(path2),
151
+ language=language,
152
+ remove_random_file=True,
153
+ )
154
+ assert not path2.exists()
155
+ check_output(self, output, expected_output[language])
156
 
157
  def test_get_speech_to_score_de_ok(self):
158
  from aip_trainer.lambdas import lambdaSpeechToScore
 
161
  path = EVENTS_FOLDER / f"test_{language}.wav"
162
  output = lambdaSpeechToScore.get_speech_to_score_dict(
163
  real_text=text_dict[language],
164
+ file_bytes_or_audiotmpfile=str(path),
165
+ language=language,
166
+ remove_random_file=False,
167
+ )
168
+ check_output(self, output, expected_output[language])
169
+
170
+ def test_get_speech_to_score_de_ok_remove_input_file(self):
171
+ import shutil
172
+ from aip_trainer.lambdas import lambdaSpeechToScore
173
+
174
+ language = "de"
175
+ path = EVENTS_FOLDER / f"test_{language}.wav"
176
+ path2 = EVENTS_FOLDER / f"test2_{language}.wav"
177
+ shutil.copy(path, path2)
178
+ assert path2.exists() and path2.is_file()
179
+ output = lambdaSpeechToScore.get_speech_to_score_dict(
180
+ real_text=text_dict[language],
181
+ file_bytes_or_audiotmpfile=str(path2),
182
+ language=language,
183
+ remove_random_file=True,
184
+ )
185
+ assert not path2.exists()
186
+ check_output(self, output, expected_output[language])
187
+
188
+ def test_get_speech_to_score_tuple_de_ok(self):
189
+ from aip_trainer.lambdas import lambdaSpeechToScore
190
+
191
+ language = "de"
192
+ path = EVENTS_FOLDER / f"test_{language}.wav"
193
+ (
194
+ real_transcripts,
195
+ is_letter_correct_all_words,
196
+ pronunciation_accuracy,
197
+ ipa_transcript,
198
+ real_transcripts_ipa,
199
+ dumped,
200
+ ) = lambdaSpeechToScore.get_speech_to_score_tuple(
201
+ real_text=text_dict[language],
202
+ file_bytes_or_audiotmpfile=str(path),
203
  language=language,
204
  remove_random_file=False,
205
  )
206
+ assert real_transcripts == text_dict[language]
207
+ check_value_by_field(is_letter_correct_all_words, "[01]+")
208
+ check_value_by_field(pronunciation_accuracy, "\d+")
209
+ assert len(ipa_transcript.strip()) > 0
210
+ assert len(real_transcripts_ipa.strip()) > 0
211
+ check_output(self, json.loads(dumped), expected_output[language])
212
+
213
+ def test_get_speech_to_score_tuple_en_ok(self):
214
+ from aip_trainer.lambdas import lambdaSpeechToScore
215
+
216
+ language = "en"
217
+ path = EVENTS_FOLDER / f"test_{language}.wav"
218
+ (
219
+ real_transcripts,
220
+ is_letter_correct_all_words,
221
+ pronunciation_accuracy,
222
+ ipa_transcript,
223
+ real_transcripts_ipa,
224
+ dumped,
225
+ ) = lambdaSpeechToScore.get_speech_to_score_tuple(
226
+ real_text=text_dict[language],
227
+ file_bytes_or_audiotmpfile=str(path),
228
+ language=language,
229
+ remove_random_file=False,
230
+ )
231
+ assert real_transcripts == text_dict[language]
232
+ check_value_by_field(is_letter_correct_all_words, "[01]+")
233
+ check_value_by_field(pronunciation_accuracy, "\d+")
234
+ assert len(ipa_transcript.strip()) > 0
235
+ assert len(real_transcripts_ipa.strip()) > 0
236
+ check_output(self, json.loads(dumped), expected_output[language])
237
+
238
+ def test_get_speech_to_score_dict__de_empty_input_text(self):
239
+ language = "de"
240
+ path = EVENTS_FOLDER / f"test_{language}.wav"
241
+ assert_raises_get_speech_to_score_dict(self, "", str(path), language, ValueError, "cannot read an empty/None text: ''...")
242
+
243
+ def test_get_speech_to_score_dict__en_empty_input_text(self):
244
+ language = "en"
245
+ path = EVENTS_FOLDER / f"test_{language}.wav"
246
+ assert_raises_get_speech_to_score_dict(self, "", str(path), language, ValueError, "cannot read an empty/None text: ''...")
247
+
248
+ def test_get_speech_to_score_dict__de_empty_input_file(self):
249
+ language = "de"
250
+ assert_raises_get_speech_to_score_dict(self, "text fake", "", language, ValueError, "cannot read an empty/None file: ''...")
251
+
252
+ def test_get_speech_to_score_dict__en_empty_input_file(self):
253
+ language = "en"
254
+ assert_raises_get_speech_to_score_dict(self, "text fake", "", language, ValueError, "cannot read an empty/None file: ''...")
255
+
256
+ def test_get_speech_to_score_dict__empty_language(self):
257
+ assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
258
 
259
 
260
  if __name__ == "__main__":