Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Dec 2, 2024

Commit

1d0bb75

1 Parent(s): bc44985

test: update test cases

Browse files

Files changed (5) hide show

aip_trainer/lambdas/lambdaSpeechToScore.py +5 -5
tests/events/empty_file.wav +0 -0
tests/events/test_stereo.wav +3 -0
tests/lambdas/test_lambdaSpeechToScore.py +91 -2
tests/lambdas/test_lambdaSpeechToScore_librosa.py +125 -8

aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED Viewed

@@ -57,8 +57,10 @@ def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | d
         raise ValueError(f"cannot read an empty/None text: '{real_text}'...")
     if language is None or len(language) == 0:
         raise NotImplementedError(f"Not tested/supported with '{language}' language...")
-    if not isinstance(file_bytes_or_audiotmpfile, (bytes, bytearray)) and (file_bytes_or_audiotmpfile is None or len(file_bytes_or_audiotmpfile) == 0 or os.path.getsize(file_bytes_or_audiotmpfile) == 0):
-        raise ValueError(f"cannot read an empty/None file: '{file_bytes_or_audiotmpfile}'...")
     start0 = time.time()
@@ -189,11 +191,9 @@ def get_selected_word(idx_recorded_word: int, raw_json_output: str) -> tuple[str
     return list_audio_files[idx_recorded_word], current_word, current_duration
-def get_splitted_audio_file(audiotmpfile: str | Path, start_time: list[float], end_time: list[float], signal: np.ndarray = None, samplerate: int = None) -> tuple[list[str], list[float]]:
     import soundfile as sf
     audio_files = []
-    if signal is not None:
-        audiotmpfile = sf.SoundFile(signal, samplerate=samplerate)
     audio_durations = []
     for n, (start_nth, end_nth) in enumerate(zip(start_time, end_time)):
         signal_nth, samplerate = soundfile_load(audiotmpfile, offset=start_nth, duration=end_nth - start_nth)

         raise ValueError(f"cannot read an empty/None text: '{real_text}'...")
     if language is None or len(language) == 0:
         raise NotImplementedError(f"Not tested/supported with '{language}' language...")
+    if file_bytes_or_audiotmpfile is None or len(file_bytes_or_audiotmpfile) == 0:
+        raise OSError(f"cannot read an empty/None file: '{file_bytes_or_audiotmpfile}'...")
+    if not isinstance(file_bytes_or_audiotmpfile, (bytes, bytearray)) and Path(file_bytes_or_audiotmpfile).exists() and Path(file_bytes_or_audiotmpfile).stat().st_size == 0:
+        raise OSError(f"cannot read an empty file: '{file_bytes_or_audiotmpfile}'...")
     start0 = time.time()
     return list_audio_files[idx_recorded_word], current_word, current_duration
+def get_splitted_audio_file(audiotmpfile: str | Path, start_time: list[float], end_time: list[float]) -> tuple[list[str], list[float]]:
     import soundfile as sf
     audio_files = []
     audio_durations = []
     for n, (start_nth, end_nth) in enumerate(zip(start_time, end_time)):
         signal_nth, samplerate = soundfile_load(audiotmpfile, offset=start_nth, duration=end_nth - start_nth)

tests/events/empty_file.wav ADDED Viewed

File without changes

tests/events/test_stereo.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52daea4f0de9c60d880a62759f1ea98d0e683d7ca4d0af46e82b284239d12d57
+size 1146820

tests/lambdas/test_lambdaSpeechToScore.py CHANGED Viewed

@@ -150,6 +150,32 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
             )
             check_output(self, output, current_expected_output)
     def test_get_speech_to_score_en_ok(self):
         from aip_trainer.lambdas import lambdaSpeechToScore
@@ -287,14 +313,19 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
     def test_get_speech_to_score_dict__de_empty_input_file(self):
         language = "de"
-        assert_raises_get_speech_to_score_dict(self, "text fake", "", language, ValueError, "cannot read an empty/None file: ''...")
     def test_get_speech_to_score_dict__en_empty_input_file(self):
         language = "en"
-        assert_raises_get_speech_to_score_dict(self, "text fake", "", language, ValueError, "cannot read an empty/None file: ''...")
     def test_get_speech_to_score_dict__empty_language(self):
         assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
     def test_get_selected_word_valid_index_de_ok(self):
         language = "de"
@@ -377,6 +408,64 @@ class TestGetAccuracyFromRecordedAudio(unittest.TestCase):
         with self.assertRaises(IndexError):
             lambdaSpeechToScore.get_selected_word(idx_recorded_word, raw_json_output)
 if __name__ == "__main__":
     unittest.main()

             )
             check_output(self, output, current_expected_output)
+    def test_lambda_handler_empty_text(self):
+        from aip_trainer.lambdas import lambdaSpeechToScore
+        with open(EVENTS_FOLDER / "GetAccuracyFromRecordedAudio.json", "r") as src:
+            inputs_outputs = json.load(src)
+        event = inputs_outputs["inputs"]["en"]
+        event_body = event["body"]
+        event_body_loaded = json.loads(event_body)
+        event_body_loaded["title"] = ""
+        event_body = json.dumps(event_body_loaded)
+        event["body"] = event_body
+        output = lambdaSpeechToScore.lambda_handler(event, [])
+        self.assertDictEqual(
+            output,
+            {
+                "statusCode": 200,
+                "headers": {
+                    "Access-Control-Allow-Headers": "*",
+                    "Access-Control-Allow-Credentials": "true",
+                    "Access-Control-Allow-Origin": "http://127.0.0.1:3000/",
+                    "Access-Control-Allow-Methods": "OPTIONS,POST,GET",
+                },
+                "body": "",
+            },
+        )
     def test_get_speech_to_score_en_ok(self):
         from aip_trainer.lambdas import lambdaSpeechToScore
     def test_get_speech_to_score_dict__de_empty_input_file(self):
         language = "de"
+        assert_raises_get_speech_to_score_dict(self, "text fake", "", language, OSError, "cannot read an empty/None file: ''...")
     def test_get_speech_to_score_dict__en_empty_input_file(self):
         language = "en"
+        assert_raises_get_speech_to_score_dict(self, "text fake", "", language, OSError, "cannot read an empty/None file: ''...")
     def test_get_speech_to_score_dict__empty_language(self):
         assert_raises_get_speech_to_score_dict(self, "text fake", "fake_file", "", NotImplementedError, "Not tested/supported with '' language...")
+    def test_get_speech_to_score_dict__empty_language(self):
+        language = "en"
+        path_file = str(EVENTS_FOLDER / "empty_file.wav")
+        assert_raises_get_speech_to_score_dict(self, "text fake", path_file, language, OSError, f"cannot read an empty file: '{path_file}'...")
     def test_get_selected_word_valid_index_de_ok(self):
         language = "de"
         with self.assertRaises(IndexError):
             lambdaSpeechToScore.get_selected_word(idx_recorded_word, raw_json_output)
+    def test_get_splitted_audio_file_valid_input(self):
+        language = "en"
+        path = str(EVENTS_FOLDER / f"test_{language}.wav")
+        start_time = [0.0, 1.0, 2.0]
+        end_time = [1.0, 2.0, 2.5]
+        audio_files, audio_durations = lambdaSpeechToScore.get_splitted_audio_file(
+            audiotmpfile=path,
+            start_time=start_time,
+            end_time=end_time
+        )
+        assert len(audio_files) == len(start_time)
+        assert len(audio_durations) == len(start_time)
+        for audio_file, duration in zip(audio_files, audio_durations):
+            audio_file_path = Path(audio_file)
+            assert audio_file_path.exists() and audio_file_path.is_file()
+            assert duration > 0
+            audio_file_path.unlink()
+    def test_get_splitted_audio_file_invalid_input(self):
+        from soundfile import LibsndfileError
+        start_time = [0.0, 1.0, 2.0]
+        end_time = [1.0, 2.0, 2.5]
+        with self.assertRaises(LibsndfileError):
+            try:
+                lambdaSpeechToScore.get_splitted_audio_file(
+                    audiotmpfile="",
+                    start_time=start_time,
+                    end_time=end_time
+                )
+            except LibsndfileError as lsfe:
+                msg = str(lsfe)
+                assert msg == "Error opening '': System error."
+                raise lsfe
+    def test_get_splitted_audio_file_mismatched_times(self):
+        from soundfile import LibsndfileError
+        language = "en"
+        path = EVENTS_FOLDER / f"test_{language}.wav"
+        start_time = [4.0, 5.0, 7.0]
+        end_time = [3.0, 4.0, 5.5]
+        with self.assertRaises(LibsndfileError):
+            try:
+                lambdaSpeechToScore.get_splitted_audio_file(
+                    audiotmpfile=str(path),
+                    start_time=start_time,
+                    end_time=end_time
+                )
+            except LibsndfileError as lsfe:
+                msg = str(lsfe)
+                assert msg == 'Internal psf_fseek() failed.'
+                raise lsfe
 if __name__ == "__main__":
     unittest.main()

tests/lambdas/test_lambdaSpeechToScore_librosa.py CHANGED Viewed

@@ -53,7 +53,7 @@ class TestAudioReadLoad(unittest.TestCase):
         signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de)
         self.assertEqual(sr_native, 44100)
         self.assertEqual(
-            signal.shape[0], 129653
         )  # Assuming the audio file is ~2,93 seconds long (107603 / 44100)
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
@@ -61,29 +61,63 @@ class TestAudioReadLoad(unittest.TestCase):
     def test_audioread_load_with_offset(self):
         signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.5)
         self.assertEqual(sr_native, 44100)
-        self.assertAlmostEqual(signal.shape[0], 107603)  # audio file is ~2.44 seconds long (107603 / 44100), offset is 0.5 seconds
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
     def test_audioread_load_with_duration(self):
         signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, duration=129653 / 44100)
         self.assertEqual(sr_native, 44100)
-        self.assertEqual(signal.shape[0], 129653)  # Assuming the duration is ~2,93 seconds long (129653 / 44100)
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
     def test_audioread_load_with_offset_and_duration(self):
         signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.5, duration=129653 / 44100)
         self.assertEqual(sr_native, 44100)
-        self.assertEqual(signal.shape[0], 107603)  # Assuming the duration is 5 seconds starting from 2 seconds offset
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
     def test_audioread_load_empty_file(self):
-        # import soundfile as sf
-        # import numpy as np
-        # signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=5, duration=129653 / 44100)
-        # sf.write(EVENTS_FOLDER / "test_empty.wav", data=signal, samplerate=44100)
         input_empty = EVENTS_FOLDER / "test_empty.wav"
         hash_input_empty = hash_calculate(input_empty, is_file=True)
         self.assertEqual(hash_input_empty, b'i4+6/oZ5B2RUQpdW+nLxHV9ELIc4HMakKFRR2Cap5ik=')
@@ -93,6 +127,89 @@ class TestAudioReadLoad(unittest.TestCase):
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
 class TestBufToFloat(unittest.TestCase):
     def test_buf_to_float_2_bytes(self):

         signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de)
         self.assertEqual(sr_native, 44100)
         self.assertEqual(
+            signal.shape, (129653,)
         )  # Assuming the audio file is ~2,93 seconds long (107603 / 44100)
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
     def test_audioread_load_with_offset(self):
         signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.5)
         self.assertEqual(sr_native, 44100)
+        self.assertAlmostEqual(signal.shape, (107603,))  # audio file is ~2.44 seconds long (107603 / 44100), offset is 0.5 seconds
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
     def test_audioread_load_with_duration(self):
         signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, duration=129653 / 44100)
         self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (129653,))  # Assuming the duration is ~2,93 seconds long (129653 / 44100)
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
     def test_audioread_load_with_offset_and_duration(self):
         signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.5, duration=129653 / 44100)
         self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (107603,))
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
+    def test_audioread_load_with_big_offset_and_duration(self):
+        signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=10, duration=129653 / 44100)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (0,))
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
+    def test_audioread_load_with_big_offset_no_duration(self):
+        signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=10)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (0,))
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
+    def test_audioread_load_with_small_very_small_duration(self):
+        signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, duration=0.000001)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (0,))
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
+    def test_audioread_load_with_small_offset_and_no_duration(self):
+        signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=0.02)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (128771,))
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'twAqaV+NNszPT6DwMOC2zL0mCx+BZ51CcoESmULfWRQ=')
     def test_audioread_load_empty_file(self):
+        """
+        To create an empty file, set an offset greater than the duration of the file:
+        ```
+        import soundfile as sf
+        import numpy as np
+        duration = 129653 / 44100  # ~2.93 seconds
+        signal, sr_native = lambdaSpeechToScore.audioread_load(input_file_test_de, offset=5, duration=duration)
+        sf.write(EVENTS_FOLDER / "test_empty.wav", data=signal, samplerate=44100)
+        ```
+        """
         input_empty = EVENTS_FOLDER / "test_empty.wav"
         hash_input_empty = hash_calculate(input_empty, is_file=True)
         self.assertEqual(hash_input_empty, b'i4+6/oZ5B2RUQpdW+nLxHV9ELIc4HMakKFRR2Cap5ik=')
         hash_output = hash_calculate(signal, is_file=False)
         self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
+    def test_audioread_load_full_stereo_file_long(self):
+        signal, sr_native = lambdaSpeechToScore.audioread_load(EVENTS_FOLDER / "test_stereo.wav", duration=6)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(
+            signal.shape, (2, 264600)
+        )  # Assuming the audio file is ~6 seconds long (264600 / 44100)
+        signal_contiguous = np.ascontiguousarray(signal)
+        hash_output = hash_calculate(signal_contiguous, is_file=False)
+        self.assertEqual(hash_output, b'NBLPhDBmZSTv844S3oDf4lMbQt1x+JbRckub/3rSEJI=')
+class TestSoundFileLoad(unittest.TestCase):
+    def test_soundfile_load_full_file(self):
+        signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(
+            signal.shape, (129653,)
+        )  # Assuming the audio file is ~2,93 seconds long (107603 / 44100)
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
+    def test_soundfile_load_with_offset(self):
+        signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de, offset=0.5)
+        self.assertEqual(sr_native, 44100)
+        self.assertAlmostEqual(signal.shape, (107603,))  # audio file is ~2.44 seconds long (107603 / 44100), offset is 0.5 seconds
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
+    def test_soundfile_load_with_duration(self):
+        signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de, duration=129653 / 44100)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (129653,))  # Assuming the duration is ~2,93 seconds long (129653 / 44100)
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
+    def test_soundfile_load_with_offset_and_duration(self):
+        signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de, offset=0.5, duration=129653 / 44100)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (107603,))  # Assuming the duration is 5 seconds starting from 2 seconds offset
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'QiDTDSZ4xAUniANNz4M43oa2FwpTSjvzW3IsKyqCVeE=')
+    def test_soundfile_load_empty_file(self):
+        """
+        To create an empty file, set an offset greater than the duration of the file:
+        ```
+        import soundfile as sf
+        import numpy as np
+        duration = 129653 / 44100  # ~2.93 seconds
+        signal, sr_native = lambdaSpeechToScore.soundfile_load(input_file_test_de, offset=5, duration=duration)
+        sf.write(EVENTS_FOLDER / "test_empty.wav", data=signal, samplerate=44100)
+        ```
+        """
+        input_empty = EVENTS_FOLDER / "test_empty.wav"
+        hash_input_empty = hash_calculate(input_empty, is_file=True)
+        self.assertEqual(hash_input_empty, b'i4+6/oZ5B2RUQpdW+nLxHV9ELIc4HMakKFRR2Cap5ik=')
+        signal, sr_native = lambdaSpeechToScore.soundfile_load(input_empty)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(signal.shape, (0, ))  # Assuming the file is empty
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU=')
+    def test_soundfile_load_full_stereo_file_long(self):
+        signal, sr_native = lambdaSpeechToScore.soundfile_load(EVENTS_FOLDER / "test_stereo.wav", duration=6)
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(
+            signal.shape, (2, 264600)
+        )  # Assuming the audio file is ~6 seconds long (264600 / 44100)
+        signal_contiguous = np.ascontiguousarray(signal)
+        hash_output = hash_calculate(signal_contiguous, is_file=False)
+        self.assertEqual(hash_output, b'NBLPhDBmZSTv844S3oDf4lMbQt1x+JbRckub/3rSEJI=')
+    def test_soundfile_load_soundfile_object(self):
+        import soundfile as sf
+        signal, sr_native = lambdaSpeechToScore.soundfile_load(sf.SoundFile(input_file_test_de))
+        self.assertEqual(sr_native, 44100)
+        self.assertEqual(
+            signal.shape, (129653,)
+        )  # Assuming the audio file is ~2,93 seconds long (107603 / 44100)
+        hash_output = hash_calculate(signal, is_file=False)
+        self.assertEqual(hash_output, b'3bfNuuMk0ov5+E77cUZmzjijfBUaMxuy1mrPmyjFyeo=')
 class TestBufToFloat(unittest.TestCase):
     def test_buf_to_float_2_bytes(self):