Spaces:
Running
Running
alessandro trinca tornidor
commited on
Commit
•
d6917a8
1
Parent(s):
509f5b7
add function to load audio files with soundfile
Browse files
aip_trainer/lambdas/lambdaSpeechToScore.py
CHANGED
@@ -49,6 +49,7 @@ def lambda_handler(event, context):
|
|
49 |
|
50 |
|
51 |
def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | dict, language: str = "en", remove_random_file: bool = True):
|
|
|
52 |
app_logger.info(f"real_text:{real_text} ...")
|
53 |
app_logger.debug(f"file_bytes:{file_bytes_or_audiotmpfile} ...")
|
54 |
app_logger.info(f"language:{language} ...")
|
@@ -74,7 +75,17 @@ def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | d
|
|
74 |
|
75 |
start = time.time()
|
76 |
app_logger.info(f'Loading .ogg file file {random_file_name} ...')
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
duration = time.time() - start
|
80 |
app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
|
@@ -156,14 +167,39 @@ def calc_start_end(sr_native, time_position, n_channels):
|
|
156 |
return int(np.round(sr_native * time_position)) * n_channels
|
157 |
|
158 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
|
160 |
"""Load an audio buffer using audioread.
|
161 |
|
162 |
This loads one block at a time, and then concatenates the results.
|
163 |
"""
|
164 |
-
|
165 |
-
import shutil
|
166 |
-
shutil.copyfile(path, Path("/tmp") / f"test_en_{Path(path).name}")
|
167 |
y = []
|
168 |
app_logger.debug(f"reading audio file at path:{path} ...")
|
169 |
with audioread.audio_open(path) as input_file:
|
|
|
49 |
|
50 |
|
51 |
def get_speech_to_score_dict(real_text: str, file_bytes_or_audiotmpfile: str | dict, language: str = "en", remove_random_file: bool = True):
|
52 |
+
from soundfile import LibsndfileError
|
53 |
app_logger.info(f"real_text:{real_text} ...")
|
54 |
app_logger.debug(f"file_bytes:{file_bytes_or_audiotmpfile} ...")
|
55 |
app_logger.info(f"language:{language} ...")
|
|
|
75 |
|
76 |
start = time.time()
|
77 |
app_logger.info(f'Loading .ogg file file {random_file_name} ...')
|
78 |
+
try:
|
79 |
+
signal, _ = soundfile_load(random_file_name)
|
80 |
+
except LibsndfileError as sfe:
|
81 |
+
# https://github.com/beetbox/audioread/issues/144
|
82 |
+
# deprecation warnings => pip install standard-aifc standard-sunau
|
83 |
+
app_logger.error(f"Error reading file {random_file_name}: {sfe}, re-try with audioread...")
|
84 |
+
try:
|
85 |
+
signal, _ = audioread_load(random_file_name)
|
86 |
+
except ModuleNotFoundError as mnfe:
|
87 |
+
app_logger.error(f"Error reading file {random_file_name}: {mnfe}, try read https://github.com/beetbox/audioread/issues/144")
|
88 |
+
raise mnfe
|
89 |
|
90 |
duration = time.time() - start
|
91 |
app_logger.info(f'Read .ogg file {random_file_name} in {duration}s.')
|
|
|
167 |
return int(np.round(sr_native * time_position)) * n_channels
|
168 |
|
169 |
|
170 |
+
def soundfile_load(path: str | Path, offset: float = 0.0, duration: float = None, dtype=np.float32):
|
171 |
+
"""Load an audio buffer using soundfile. Taken from librosa """
|
172 |
+
import soundfile as sf
|
173 |
+
|
174 |
+
if isinstance(path, sf.SoundFile):
|
175 |
+
# If the user passed an existing soundfile object,
|
176 |
+
# we can use it directly
|
177 |
+
context = path
|
178 |
+
else:
|
179 |
+
# Otherwise, create the soundfile object
|
180 |
+
context = sf.SoundFile(path)
|
181 |
+
|
182 |
+
with context as sf_desc:
|
183 |
+
sr_native = sf_desc.samplerate
|
184 |
+
if offset:
|
185 |
+
# Seek to the start of the target read
|
186 |
+
sf_desc.seek(int(offset * sr_native))
|
187 |
+
if duration is not None:
|
188 |
+
frame_duration = int(duration * sr_native)
|
189 |
+
else:
|
190 |
+
frame_duration = -1
|
191 |
+
|
192 |
+
# Load the target number of frames, and transpose to match librosa form
|
193 |
+
y = sf_desc.read(frames=frame_duration, dtype=dtype, always_2d=False).T
|
194 |
+
|
195 |
+
return y, sr_native
|
196 |
+
|
197 |
+
|
198 |
def audioread_load(path, offset=0.0, duration=None, dtype=np.float32):
|
199 |
"""Load an audio buffer using audioread.
|
200 |
|
201 |
This loads one block at a time, and then concatenates the results.
|
202 |
"""
|
|
|
|
|
|
|
203 |
y = []
|
204 |
app_logger.debug(f"reading audio file at path:{path} ...")
|
205 |
with audioread.audio_open(path) as input_file:
|