Spaces:

juancopi81
/

youtube-music-transcribe

Build error

App Files Files Community

juancopi81 commited on Nov 4, 2022

Commit

0760318

•

1 Parent(s): c970deb

Change to accept only 10 sec for transcription

Browse files

Files changed (2) hide show

app.py +2 -0
utils.py +50 -3

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ os.system("python3 -m pip install -e .")
 import gradio as gr
 from inferencemodel import InferenceModel
 from utils import upload_audio

 import gradio as gr
+import note_seq
 from inferencemodel import InferenceModel
 from utils import upload_audio

utils.py CHANGED Viewed

@@ -1,7 +1,54 @@
-import note_seq
 def upload_audio(audio, sample_rate):
-  return note_seq.audio_io.wav_data_to_samples_librosa(
-    audio, sample_rate=sample_rate)

+import tempfile
+import librosa
+class AudioIOReadError(BaseException):  # pylint:disable=g-bad-exception-name
+  pass
 def upload_audio(audio, sample_rate):
+  return wav_data_to_samples_librosa(audio, sample_rate=sample_rate)
+def wav_data_to_samples_librosa(audio_file, sample_rate):
+  """Loads an in-memory audio file with librosa.
+  Use this instead of wav_data_to_samples if the wav is 24-bit, as that's
+  incompatible with wav_data_to_samples internal scipy call.
+  Will copy to a local temp file before loading so that librosa can read a file
+  path. Librosa does not currently read in-memory files.
+  It will be treated as a .wav file.
+  Args:
+    audio_file: Wav file to load.
+    sample_rate: The number of samples per second at which the audio will be
+        returned. Resampling will be performed if necessary.
+  Returns:
+    A numpy array of audio samples, single-channel (mono) and sampled at the
+    specified rate, in float32 format.
+  Raises:
+    AudioIOReadException: If librosa is unable to load the audio data.
+  """
+  with tempfile.NamedTemporaryFile(suffix='.wav') as wav_input_file:
+    wav_input_file.write(audio_file)
+    # Before copying the file, flush any contents
+    wav_input_file.flush()
+    # And back the file position to top (not need for Copy but for certainty)
+    wav_input_file.seek(0)
+    return load_audio(wav_input_file.name, sample_rate)
+def load_audio(audio_filename, sample_rate, duration=10):
+  """Loads an audio file.
+  Args:
+    audio_filename: File path to load.
+    sample_rate: The number of samples per second at which the audio will be
+        returned. Resampling will be performed if necessary.
+  Returns:
+    A numpy array of audio samples, single-channel (mono) and sampled at the
+    specified rate, in float32 format.
+  Raises:
+    AudioIOReadError: If librosa is unable to load the audio data.
+  """
+  try:
+    y, unused_sr = librosa.load(audio_filename, sr=sample_rate, mono=True, duration=duration)
+  except Exception as e:  # pylint: disable=broad-except
+    raise AudioIOReadError(e)
+  return y