alessandro trinca tornidor commited on
Commit
d6917a8
1 Parent(s): 509f5b7

add function to load audio files with soundfile

Browse files
aip_trainer/lambdas/lambdaSpeechToScore.py CHANGED
@@ -49,6 +49,7 @@ def lambda_handler(event, context):
49
 
50
 
51
  def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | dict, language: str = "en", remove_random_file: bool = True):
 
52
  app_logger.info(f"real_text:{real_text} ...")
53
  app_logger.debug(f"file_bytes:{file_bytes_or_audiotmpfile} ...")
54
  app_logger.info(f"language:{language} ...")
@@ -74,7 +75,17 @@ def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | d
74
 
75
  start = time.time()
76
  app_logger.info(f'Loading .ogg file file {random_file_name} ...')
77
- signal, _ = audioread_load(random_file_name)
 
 
 
 
 
 
 
 
 
 
78
 
79
  duration = time.time() - start
80
  app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
@@ -156,14 +167,39 @@ def calc_start_end(sr_native, time_position, n_channels):
156
  return int(np.round(sr_native * time_position)) * n_channels
157
 
158
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
160
  """Load an audio buffer using audioread.
161
 
162
  This loads one block at a time, and then concatenates the results.
163
  """
164
-
165
- import shutil
166
- shutil.copyfile(path, Path("/tmp") / f"test_en_{Path(path).name}")
167
  y = []
168
  app_logger.debug(f"reading audio file at path:{path} ...")
169
  with audioread.audio_open(path) as input_file:
 
49
 
50
 
51
  def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | dict, language: str = "en", remove_random_file: bool = True):
52
+ from soundfile import LibsndfileError
53
  app_logger.info(f"real_text:{real_text} ...")
54
  app_logger.debug(f"file_bytes:{file_bytes_or_audiotmpfile} ...")
55
  app_logger.info(f"language:{language} ...")
 
75
 
76
  start = time.time()
77
  app_logger.info(f'Loading .ogg file file {random_file_name} ...')
78
+ try:
79
+ signal, _ = soundfile_load(random_file_name)
80
+ except LibsndfileError as sfe:
81
+ # https://github.com/beetbox/audioread/issues/144
82
+ # deprecation warnings => pip install standard-aifc standard-sunau
83
+ app_logger.error(f"Error reading file {random_file_name}: {sfe}, re-try with audioread...")
84
+ try:
85
+ signal, _ = audioread_load(random_file_name)
86
+ except ModuleNotFoundError as mnfe:
87
+ app_logger.error(f"Error reading file {random_file_name}: {mnfe}, try read https://github.com/beetbox/audioread/issues/144")
88
+ raise mnfe
89
 
90
  duration = time.time() - start
91
  app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
 
167
  return int(np.round(sr_native * time_position)) * n_channels
168
 
169
 
170
+ def soundfile_load(path: str | Path, offset: float = 0.0, duration: float = None, dtype=np.float32):
171
+ """Load an audio buffer using soundfile. Taken from librosa """
172
+ import soundfile as sf
173
+
174
+ if isinstance(path, sf.SoundFile):
175
+ # If the user passed an existing soundfile object,
176
+ # we can use it directly
177
+ context = path
178
+ else:
179
+ # Otherwise, create the soundfile object
180
+ context = sf.SoundFile(path)
181
+
182
+ with context as sf_desc:
183
+ sr_native = sf_desc.samplerate
184
+ if offset:
185
+ # Seek to the start of the target read
186
+ sf_desc.seek(int(offset * sr_native))
187
+ if duration is not None:
188
+ frame_duration = int(duration * sr_native)
189
+ else:
190
+ frame_duration = -1
191
+
192
+ # Load the target number of frames, and transpose to match librosa form
193
+ y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
194
+
195
+ return y, sr_native
196
+
197
+
198
  def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
199
  """Load an audio buffer using audioread.
200
 
201
  This loads one block at a time, and then concatenates the results.
202
  """
 
 
 
203
  y = []
204
  app_logger.debug(f"reading audio file at path:{path} ...")
205
  with audioread.audio_open(path) as input_file: