Spaces:

aletrn
/

ai-pronunciation-trainer

Running

App Files Files Community

alessandro trinca tornidor commited on Nov 28, 2024

Commit

d6917a8

1 Parent(s): 509f5b7

add function to load audio files with soundfile

Browse files

Files changed (1) hide show

aip_trainer/lambdas/lambdaSpeechToScore.py +40 -4

aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED Viewed

@@ -49,6 +49,7 @@ def lambda_handler(event, context):
 def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | dict, language: str = "en", remove_random_file: bool = True):
     app_logger.info(f"real_text:{real_text} ...")
     app_logger.debug(f"file_bytes:{file_bytes_or_audiotmpfile} ...")
     app_logger.info(f"language:{language} ...")
@@ -74,7 +75,17 @@ def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | d
     start = time.time()
     app_logger.info(f'Loading .ogg file file {random_file_name} ...')
-    signal, _ = audioread_load(random_file_name)
     duration = time.time() - start
     app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
@@ -156,14 +167,39 @@ def calc_start_end(sr_native, time_position, n_channels):
     return int(np.round(sr_native * time_position)) * n_channels
 def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
     """Load an audio buffer using audioread.
     This loads one block at a time, and then concatenates the results.
     """
-    import shutil
-    shutil.copyfile(path, Path("/tmp") / f"test_en_{Path(path).name}")
     y = []
     app_logger.debug(f"reading audio file at path:{path} ...")
     with audioread.audio_open(path) as input_file:

 def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | dict, language: str = "en", remove_random_file: bool = True):
+    from soundfile import LibsndfileError
     app_logger.info(f"real_text:{real_text} ...")
     app_logger.debug(f"file_bytes:{file_bytes_or_audiotmpfile} ...")
     app_logger.info(f"language:{language} ...")
     start = time.time()
     app_logger.info(f'Loading .ogg file file {random_file_name} ...')
+    try:
+        signal, _ = soundfile_load(random_file_name)
+    except LibsndfileError as sfe:
+        # https://github.com/beetbox/audioread/issues/144
+        # deprecation warnings => pip install standard-aifc standard-sunau
+        app_logger.error(f"Error reading file {random_file_name}: {sfe}, re-try with audioread...")
+        try:
+            signal, _ = audioread_load(random_file_name)
+        except ModuleNotFoundError as mnfe:
+            app_logger.error(f"Error reading file {random_file_name}: {mnfe}, try read https://github.com/beetbox/audioread/issues/144")
+            raise mnfe
     duration = time.time() - start
     app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
     return int(np.round(sr_native * time_position)) * n_channels
+def soundfile_load(path: str | Path, offset: float = 0.0, duration: float = None, dtype=np.float32):
+    """Load an audio buffer using soundfile. Taken from librosa """
+    import soundfile as sf
+    if isinstance(path, sf.SoundFile):
+        # If the user passed an existing soundfile object,
+        # we can use it directly
+        context = path
+    else:
+        # Otherwise, create the soundfile object
+        context = sf.SoundFile(path)
+    with context as sf_desc:
+        sr_native = sf_desc.samplerate
+        if offset:
+            # Seek to the start of the target read
+            sf_desc.seek(int(offset * sr_native))
+        if duration is not None:
+            frame_duration = int(duration * sr_native)
+        else:
+            frame_duration = -1
+        # Load the target number of frames, and transpose to match librosa form
+        y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
+    return y, sr_native
 def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
     """Load an audio buffer using audioread.
     This loads one block at a time, and then concatenates the results.
     """
     y = []
     app_logger.debug(f"reading audio file at path:{path} ...")
     with audioread.audio_open(path) as input_file: