Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
·
1d0bb75
1
Parent(s):
bc44985
test: update test cases
Browse files
aip_trainer/lambdas/lambdaSpeechToScore.py
CHANGED
@@ -57,8 +57,10 @@ def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | d
|
|
57 |
raise ValueError(f"cannot read an empty/None text: '{real_text}'...")
|
58 |
if language is None or len(language) == 0:
|
59 |
raise NotImplementedError(f"Not tested/supported with '{language}' language...")
|
60 |
-
if
|
61 |
-
raise
|
|
|
|
|
62 |
|
63 |
start0 = time.time()
|
64 |
|
@@ -189,11 +191,9 @@ def get_selected_word(idx_recorded_word: int, raw_json_output: str) -> tuple[str
|
|
189 |
return list_audio_files[idx_recorded_word], current_word, current_duration
|
190 |
|
191 |
|
192 |
-
def get_splitted_audio_file(audiotmpfile: str | Path, start_time: list[float], end_time: list[float]
|
193 |
import soundfile as sf
|
194 |
audio_files = []
|
195 |
-
if signal is not None:
|
196 |
-
audiotmpfile = sf.SoundFile(signal, samplerate=samplerate)
|
197 |
audio_durations = []
|
198 |
for n, (start_nth, end_nth) in enumerate(zip(start_time, end_time)):
|
199 |
signal_nth, samplerate = soundfile_load(audiotmpfile, offset=start_nth, duration=end_nth - start_nth)
|
|
|
57 |
raise ValueError(f"cannot read an empty/None text: '{real_text}'...")
|
58 |
if language is None or len(language) == 0:
|
59 |
raise NotImplementedError(f"Not tested/supported with '{language}' language...")
|
60 |
+
if file_bytes_or_audiotmpfile is None or len(file_bytes_or_audiotmpfile) == 0:
|
61 |
+
raise OSError(f"cannot read an empty/None file: '{file_bytes_or_audiotmpfile}'...")
|
62 |
+
if not isinstance(file_bytes_or_audiotmpfile, (bytes, bytearray)) and Path(file_bytes_or_audiotmpfile).exists() and Path(file_bytes_or_audiotmpfile).stat().st_size == 0:
|
63 |
+
raise OSError(f"cannot read an empty file: '{file_bytes_or_audiotmpfile}'...")
|
64 |
|
65 |
start0 = time.time()
|
66 |
|
|
|
191 |
return list_audio_files[idx_recorded_word], current_word, current_duration
|
192 |
|
193 |
|
194 |
+
def get_splitted_audio_file(audiotmpfile: str | Path, start_time: list[float], end_time: list[float]) -> tuple[list[str], list[float]]:
|
195 |
import soundfile as sf
|
196 |
audio_files = []
|
|
|
|
|
197 |
audio_durations = []
|
198 |
for n, (start_nth, end_nth) in enumerate(zip(start_time, end_time)):
|
199 |
signal_nth, samplerate = soundfile_load(audiotmpfile, offset=start_nth, duration=end_nth - start_nth)
|
tests/events/empty_file.wav
ADDED
File without changes
|
tests/events/test_stereo.wav
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:52daea4f0de9c60d880a62759f1ea98d0e683d7ca4d0af46e82b284239d12d57
|
3 |
+
size 1146820
|
tests/lambdas/test_lambdaSpeechToScore.py
CHANGED
@@ -150,6 +150,32 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
|
|
150 |
)
|
151 |
check_output(self, output, current_expected_output)
|
152 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
153 |
def test_get_speech_to_score_en_ok(self):
|
154 |
from aip_trainer.lambdas import lambdaSpeechToScore
|
155 |
|
@@ -287,14 +313,19 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
|
|
287 |
|
288 |
def test_get_speech_to_score_dict__de_empty_input_file(self):
|
289 |
language = "de"
|
290 |
-
assert_raises_get_speech_to_score_dict(self, "text fake", "", language,
|
291 |
|
292 |
def test_get_speech_to_score_dict__en_empty_input_file(self):
|
293 |
language = "en"
|
294 |
-
assert_raises_get_speech_to_score_dict(self, "text fake", "", language,
|
295 |
|
296 |
def test_get_speech_to_score_dict__empty_language(self):
|
297 |
assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
|
|
|
|
|
|
|
|
|
|
|
298 |
|
299 |
def test_get_selected_word_valid_index_de_ok(self):
|
300 |
language = "de"
|
@@ -377,6 +408,64 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
|
|
377 |
with self.assertRaises(IndexError):
|
378 |
lambdaSpeechToScore.get_selected_word(idx_recorded_word, raw_json_output)
|
379 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
|
381 |
if __name__ == "__main__":
|
382 |
unittest.main()
|
|
|
150 |
)
|
151 |
check_output(self, output, current_expected_output)
|
152 |
|
153 |
+
def test_lambda_handler_empty_text(self):
|
154 |
+
from aip_trainer.lambdas import lambdaSpeechToScore
|
155 |
+
|
156 |
+
with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
|
157 |
+
inputs_outputs = json.load(src)
|
158 |
+
event = inputs_outputs["inputs"]["en"]
|
159 |
+
event_body = event["body"]
|
160 |
+
event_body_loaded = json.loads(event_body)
|
161 |
+
event_body_loaded["title"] = ""
|
162 |
+
event_body = json.dumps(event_body_loaded)
|
163 |
+
event["body"] = event_body
|
164 |
+
output = lambdaSpeechToScore.lambda_handler(event, [])
|
165 |
+
self.assertDictEqual(
|
166 |
+
output,
|
167 |
+
{
|
168 |
+
"statusCode": 200,
|
169 |
+
"headers": {
|
170 |
+
"Access-Control-Allow-Headers": "*",
|
171 |
+
"Access-Control-Allow-Credentials": "true",
|
172 |
+
"Access-Control-Allow-Origin": "http://127.0.0.1:3000/",
|
173 |
+
"Access-Control-Allow-Methods": "OPTIONS,POST,GET",
|
174 |
+
},
|
175 |
+
"body": "",
|
176 |
+
},
|
177 |
+
)
|
178 |
+
|
179 |
def test_get_speech_to_score_en_ok(self):
|
180 |
from aip_trainer.lambdas import lambdaSpeechToScore
|
181 |
|
|
|
313 |
|
314 |
def test_get_speech_to_score_dict__de_empty_input_file(self):
|
315 |
language = "de"
|
316 |
+
assert_raises_get_speech_to_score_dict(self, "text fake", "", language, OSError, "cannot read an empty/None file: ''...")
|
317 |
|
318 |
def test_get_speech_to_score_dict__en_empty_input_file(self):
|
319 |
language = "en"
|
320 |
+
assert_raises_get_speech_to_score_dict(self, "text fake", "", language, OSError, "cannot read an empty/None file: ''...")
|
321 |
|
322 |
def test_get_speech_to_score_dict__empty_language(self):
|
323 |
assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
|
324 |
+
|
325 |
+
def test_get_speech_to_score_dict__empty_language(self):
|
326 |
+
language = "en"
|
327 |
+
path_file = str(EVENTS_FOLDER / "empty_file.wav")
|
328 |
+
assert_raises_get_speech_to_score_dict(self, "text fake", path_file, language, OSError, f"cannot read an empty file: '{path_file}'...")
|
329 |
|
330 |
def test_get_selected_word_valid_index_de_ok(self):
|
331 |
language = "de"
|
|
|
408 |
with self.assertRaises(IndexError):
|
409 |
lambdaSpeechToScore.get_selected_word(idx_recorded_word, raw_json_output)
|
410 |
|
411 |
+
def test_get_splitted_audio_file_valid_input(self):
|
412 |
+
language = "en"
|
413 |
+
path = str(EVENTS_FOLDER / f"test_{language}.wav")
|
414 |
+
start_time = [0.0, 1.0, 2.0]
|
415 |
+
end_time = [1.0, 2.0, 2.5]
|
416 |
+
|
417 |
+
audio_files, audio_durations = lambdaSpeechToScore.get_splitted_audio_file(
|
418 |
+
audiotmpfile=path,
|
419 |
+
start_time=start_time,
|
420 |
+
end_time=end_time
|
421 |
+
)
|
422 |
+
|
423 |
+
assert len(audio_files) == len(start_time)
|
424 |
+
assert len(audio_durations) == len(start_time)
|
425 |
+
for audio_file, duration in zip(audio_files, audio_durations):
|
426 |
+
audio_file_path = Path(audio_file)
|
427 |
+
assert audio_file_path.exists() and audio_file_path.is_file()
|
428 |
+
assert duration > 0
|
429 |
+
audio_file_path.unlink()
|
430 |
+
|
431 |
+
def test_get_splitted_audio_file_invalid_input(self):
|
432 |
+
from soundfile import LibsndfileError
|
433 |
+
|
434 |
+
start_time = [0.0, 1.0, 2.0]
|
435 |
+
end_time = [1.0, 2.0, 2.5]
|
436 |
+
|
437 |
+
with self.assertRaises(LibsndfileError):
|
438 |
+
try:
|
439 |
+
lambdaSpeechToScore.get_splitted_audio_file(
|
440 |
+
audiotmpfile="",
|
441 |
+
start_time=start_time,
|
442 |
+
end_time=end_time
|
443 |
+
)
|
444 |
+
except LibsndfileError as lsfe:
|
445 |
+
msg = str(lsfe)
|
446 |
+
assert msg == "Error opening '': System error."
|
447 |
+
raise lsfe
|
448 |
+
|
449 |
+
def test_get_splitted_audio_file_mismatched_times(self):
|
450 |
+
from soundfile import LibsndfileError
|
451 |
+
|
452 |
+
language = "en"
|
453 |
+
path = EVENTS_FOLDER / f"test_{language}.wav"
|
454 |
+
start_time = [4.0, 5.0, 7.0]
|
455 |
+
end_time = [3.0, 4.0, 5.5]
|
456 |
+
|
457 |
+
with self.assertRaises(LibsndfileError):
|
458 |
+
try:
|
459 |
+
lambdaSpeechToScore.get_splitted_audio_file(
|
460 |
+
audiotmpfile=str(path),
|
461 |
+
start_time=start_time,
|
462 |
+
end_time=end_time
|
463 |
+
)
|
464 |
+
except LibsndfileError as lsfe:
|
465 |
+
msg = str(lsfe)
|
466 |
+
assert msg == 'Internal psf_fseek() failed.'
|
467 |
+
raise lsfe
|
468 |
+
|
469 |
|
470 |
if __name__ == "__main__":
|
471 |
unittest.main()
|
tests/lambdas/test_lambdaSpeechToScore_librosa.py
CHANGED
@@ -53,7 +53,7 @@ class TestAudioReadLoad(unittest.TestCase):
|
|
53 |
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de)
|
54 |
self.assertEqual(sr_native, 44100)
|
55 |
self.assertEqual(
|
56 |
-
signal.shape
|
57 |
) # Assuming the audio file is ~2,93 seconds long (107603 / 44100)
|
58 |
hash_output = hash_calculate(signal, is_file=False)
|
59 |
self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
|
@@ -61,29 +61,63 @@ class TestAudioReadLoad(unittest.TestCase):
|
|
61 |
def test_audioread_load_with_offset(self):
|
62 |
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.5)
|
63 |
self.assertEqual(sr_native, 44100)
|
64 |
-
self.assertAlmostEqual(signal.shape
|
65 |
hash_output = hash_calculate(signal, is_file=False)
|
66 |
self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
|
67 |
|
68 |
def test_audioread_load_with_duration(self):
|
69 |
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, duration=129653 / 44100)
|
70 |
self.assertEqual(sr_native, 44100)
|
71 |
-
self.assertEqual(signal.shape
|
72 |
hash_output = hash_calculate(signal, is_file=False)
|
73 |
self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
|
74 |
|
75 |
def test_audioread_load_with_offset_and_duration(self):
|
76 |
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.5, duration=129653 / 44100)
|
77 |
self.assertEqual(sr_native, 44100)
|
78 |
-
self.assertEqual(signal.shape
|
79 |
hash_output = hash_calculate(signal, is_file=False)
|
80 |
self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
def test_audioread_load_empty_file(self):
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
input_empty = EVENTS_FOLDER / "test_empty.wav"
|
88 |
hash_input_empty = hash_calculate(input_empty, is_file=True)
|
89 |
self.assertEqual(hash_input_empty, b'i4+6/oZ5B2RUQpdW+nLxHV9ELIc4HMakKFRR2Cap5ik=')
|
@@ -93,6 +127,89 @@ class TestAudioReadLoad(unittest.TestCase):
|
|
93 |
hash_output = hash_calculate(signal, is_file=False)
|
94 |
self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
|
95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
class TestBufToFloat(unittest.TestCase):
|
98 |
def test_buf_to_float_2_bytes(self):
|
|
|
53 |
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de)
|
54 |
self.assertEqual(sr_native, 44100)
|
55 |
self.assertEqual(
|
56 |
+
signal.shape, (129653,)
|
57 |
) # Assuming the audio file is ~2,93 seconds long (107603 / 44100)
|
58 |
hash_output = hash_calculate(signal, is_file=False)
|
59 |
self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
|
|
|
61 |
def test_audioread_load_with_offset(self):
|
62 |
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.5)
|
63 |
self.assertEqual(sr_native, 44100)
|
64 |
+
self.assertAlmostEqual(signal.shape, (107603,)) # audio file is ~2.44 seconds long (107603 / 44100), offset is 0.5 seconds
|
65 |
hash_output = hash_calculate(signal, is_file=False)
|
66 |
self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
|
67 |
|
68 |
def test_audioread_load_with_duration(self):
|
69 |
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, duration=129653 / 44100)
|
70 |
self.assertEqual(sr_native, 44100)
|
71 |
+
self.assertEqual(signal.shape, (129653,)) # Assuming the duration is ~2,93 seconds long (129653 / 44100)
|
72 |
hash_output = hash_calculate(signal, is_file=False)
|
73 |
self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
|
74 |
|
75 |
def test_audioread_load_with_offset_and_duration(self):
|
76 |
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.5, duration=129653 / 44100)
|
77 |
self.assertEqual(sr_native, 44100)
|
78 |
+
self.assertEqual(signal.shape, (107603,))
|
79 |
hash_output = hash_calculate(signal, is_file=False)
|
80 |
self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
|
81 |
+
|
82 |
+
def test_audioread_load_with_big_offset_and_duration(self):
|
83 |
+
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=10, duration=129653 / 44100)
|
84 |
+
self.assertEqual(sr_native, 44100)
|
85 |
+
self.assertEqual(signal.shape, (0,))
|
86 |
+
hash_output = hash_calculate(signal, is_file=False)
|
87 |
+
self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
|
88 |
+
|
89 |
+
def test_audioread_load_with_big_offset_no_duration(self):
|
90 |
+
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=10)
|
91 |
+
self.assertEqual(sr_native, 44100)
|
92 |
+
self.assertEqual(signal.shape, (0,))
|
93 |
+
hash_output = hash_calculate(signal, is_file=False)
|
94 |
+
self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
|
95 |
+
|
96 |
+
def test_audioread_load_with_small_very_small_duration(self):
|
97 |
+
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, duration=0.000001)
|
98 |
+
self.assertEqual(sr_native, 44100)
|
99 |
+
self.assertEqual(signal.shape, (0,))
|
100 |
+
hash_output = hash_calculate(signal, is_file=False)
|
101 |
+
self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
|
102 |
+
|
103 |
+
def test_audioread_load_with_small_offset_and_no_duration(self):
|
104 |
+
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.02)
|
105 |
+
self.assertEqual(sr_native, 44100)
|
106 |
+
self.assertEqual(signal.shape, (128771,))
|
107 |
+
hash_output = hash_calculate(signal, is_file=False)
|
108 |
+
self.assertEqual(hash_output, b'twAqaV+NNszPT6DwMOC2zL0mCx+BZ51CcoESmULfWRQ=')
|
109 |
|
110 |
def test_audioread_load_empty_file(self):
|
111 |
+
"""
|
112 |
+
To create an empty file, set an offset greater than the duration of the file:
|
113 |
+
```
|
114 |
+
import soundfile as sf
|
115 |
+
import numpy as np
|
116 |
+
duration = 129653 / 44100 # ~2.93 seconds
|
117 |
+
signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=5, duration=duration)
|
118 |
+
sf.write(EVENTS_FOLDER / "test_empty.wav", data=signal, samplerate=44100)
|
119 |
+
```
|
120 |
+
"""
|
121 |
input_empty = EVENTS_FOLDER / "test_empty.wav"
|
122 |
hash_input_empty = hash_calculate(input_empty, is_file=True)
|
123 |
self.assertEqual(hash_input_empty, b'i4+6/oZ5B2RUQpdW+nLxHV9ELIc4HMakKFRR2Cap5ik=')
|
|
|
127 |
hash_output = hash_calculate(signal, is_file=False)
|
128 |
self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
|
129 |
|
130 |
+
def test_audioread_load_full_stereo_file_long(self):
|
131 |
+
signal, sr_native = lambdaSpeechToScore.audioread_load(EVENTS_FOLDER / "test_stereo.wav", duration=6)
|
132 |
+
self.assertEqual(sr_native, 44100)
|
133 |
+
self.assertEqual(
|
134 |
+
signal.shape, (2, 264600)
|
135 |
+
) # Assuming the audio file is ~6 seconds long (264600 / 44100)
|
136 |
+
signal_contiguous = np.ascontiguousarray(signal)
|
137 |
+
hash_output = hash_calculate(signal_contiguous, is_file=False)
|
138 |
+
self.assertEqual(hash_output, b'NBLPhDBmZSTv844S3oDf4lMbQt1x+JbRckub/3rSEJI=')
|
139 |
+
|
140 |
+
|
141 |
+
class TestSoundFileLoad(unittest.TestCase):
|
142 |
+
|
143 |
+
def test_soundfile_load_full_file(self):
|
144 |
+
signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de)
|
145 |
+
self.assertEqual(sr_native, 44100)
|
146 |
+
self.assertEqual(
|
147 |
+
signal.shape, (129653,)
|
148 |
+
) # Assuming the audio file is ~2,93 seconds long (107603 / 44100)
|
149 |
+
hash_output = hash_calculate(signal, is_file=False)
|
150 |
+
self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
|
151 |
+
|
152 |
+
def test_soundfile_load_with_offset(self):
|
153 |
+
signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de, offset=0.5)
|
154 |
+
self.assertEqual(sr_native, 44100)
|
155 |
+
self.assertAlmostEqual(signal.shape, (107603,)) # audio file is ~2.44 seconds long (107603 / 44100), offset is 0.5 seconds
|
156 |
+
hash_output = hash_calculate(signal, is_file=False)
|
157 |
+
self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
|
158 |
+
|
159 |
+
def test_soundfile_load_with_duration(self):
|
160 |
+
signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de, duration=129653 / 44100)
|
161 |
+
self.assertEqual(sr_native, 44100)
|
162 |
+
self.assertEqual(signal.shape, (129653,)) # Assuming the duration is ~2,93 seconds long (129653 / 44100)
|
163 |
+
hash_output = hash_calculate(signal, is_file=False)
|
164 |
+
self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
|
165 |
+
|
166 |
+
def test_soundfile_load_with_offset_and_duration(self):
|
167 |
+
signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de, offset=0.5, duration=129653 / 44100)
|
168 |
+
self.assertEqual(sr_native, 44100)
|
169 |
+
self.assertEqual(signal.shape, (107603,)) # Assuming the duration is 5 seconds starting from 2 seconds offset
|
170 |
+
hash_output = hash_calculate(signal, is_file=False)
|
171 |
+
self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
|
172 |
+
|
173 |
+
def test_soundfile_load_empty_file(self):
|
174 |
+
"""
|
175 |
+
To create an empty file, set an offset greater than the duration of the file:
|
176 |
+
```
|
177 |
+
import soundfile as sf
|
178 |
+
import numpy as np
|
179 |
+
duration = 129653 / 44100 # ~2.93 seconds
|
180 |
+
signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de, offset=5, duration=duration)
|
181 |
+
sf.write(EVENTS_FOLDER / "test_empty.wav", data=signal, samplerate=44100)
|
182 |
+
```
|
183 |
+
"""
|
184 |
+
input_empty = EVENTS_FOLDER / "test_empty.wav"
|
185 |
+
hash_input_empty = hash_calculate(input_empty, is_file=True)
|
186 |
+
self.assertEqual(hash_input_empty, b'i4+6/oZ5B2RUQpdW+nLxHV9ELIc4HMakKFRR2Cap5ik=')
|
187 |
+
signal, sr_native = lambdaSpeechToScore.soundfile_load(input_empty)
|
188 |
+
self.assertEqual(sr_native, 44100)
|
189 |
+
self.assertEqual(signal.shape, (0, )) # Assuming the file is empty
|
190 |
+
hash_output = hash_calculate(signal, is_file=False)
|
191 |
+
self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
|
192 |
+
|
193 |
+
def test_soundfile_load_full_stereo_file_long(self):
|
194 |
+
signal, sr_native = lambdaSpeechToScore.soundfile_load(EVENTS_FOLDER / "test_stereo.wav", duration=6)
|
195 |
+
self.assertEqual(sr_native, 44100)
|
196 |
+
self.assertEqual(
|
197 |
+
signal.shape, (2, 264600)
|
198 |
+
) # Assuming the audio file is ~6 seconds long (264600 / 44100)
|
199 |
+
signal_contiguous = np.ascontiguousarray(signal)
|
200 |
+
hash_output = hash_calculate(signal_contiguous, is_file=False)
|
201 |
+
self.assertEqual(hash_output, b'NBLPhDBmZSTv844S3oDf4lMbQt1x+JbRckub/3rSEJI=')
|
202 |
+
|
203 |
+
def test_soundfile_load_soundfile_object(self):
|
204 |
+
import soundfile as sf
|
205 |
+
signal, sr_native = lambdaSpeechToScore.soundfile_load(sf.SoundFile(input_file_test_de))
|
206 |
+
self.assertEqual(sr_native, 44100)
|
207 |
+
self.assertEqual(
|
208 |
+
signal.shape, (129653,)
|
209 |
+
) # Assuming the audio file is ~2,93 seconds long (107603 / 44100)
|
210 |
+
hash_output = hash_calculate(signal, is_file=False)
|
211 |
+
self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
|
212 |
+
|
213 |
|
214 |
class TestBufToFloat(unittest.TestCase):
|
215 |
def test_buf_to_float_2_bytes(self):
|