juancopi81 commited on
Commit
0760318
1 Parent(s): c970deb

Change to accept only 10 sec for transcription

Browse files
Files changed (2) hide show
  1. app.py +2 -0
  2. utils.py +50 -3
app.py CHANGED
@@ -4,6 +4,8 @@ os.system("python3 -m pip install -e .")
4
 
5
  import gradio as gr
6
 
 
 
7
  from inferencemodel import InferenceModel
8
  from utils import upload_audio
9
 
 
4
 
5
  import gradio as gr
6
 
7
+ import note_seq
8
+
9
  from inferencemodel import InferenceModel
10
  from utils import upload_audio
11
 
utils.py CHANGED
@@ -1,7 +1,54 @@
1
 
2
- import note_seq
 
 
 
 
 
3
 
4
  def upload_audio(audio, sample_rate):
5
 
6
- return note_seq.audio_io.wav_data_to_samples_librosa(
7
- audio, sample_rate=sample_rate)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
+ import tempfile
3
+
4
+ import librosa
5
+
6
+ class AudioIOReadError(BaseException): # pylint:disable=g-bad-exception-name
7
+ pass
8
 
9
  def upload_audio(audio, sample_rate):
10
 
11
+ return wav_data_to_samples_librosa(audio, sample_rate=sample_rate)
12
+
13
+ def wav_data_to_samples_librosa(audio_file, sample_rate):
14
+ """Loads an in-memory audio file with librosa.
15
+ Use this instead of wav_data_to_samples if the wav is 24-bit, as that's
16
+ incompatible with wav_data_to_samples internal scipy call.
17
+ Will copy to a local temp file before loading so that librosa can read a file
18
+ path. Librosa does not currently read in-memory files.
19
+ It will be treated as a .wav file.
20
+ Args:
21
+ audio_file: Wav file to load.
22
+ sample_rate: The number of samples per second at which the audio will be
23
+ returned. Resampling will be performed if necessary.
24
+ Returns:
25
+ A numpy array of audio samples, single-channel (mono) and sampled at the
26
+ specified rate, in float32 format.
27
+ Raises:
28
+ AudioIOReadException: If librosa is unable to load the audio data.
29
+ """
30
+ with tempfile.NamedTemporaryFile(suffix='.wav') as wav_input_file:
31
+ wav_input_file.write(audio_file)
32
+ # Before copying the file, flush any contents
33
+ wav_input_file.flush()
34
+ # And back the file position to top (not need for Copy but for certainty)
35
+ wav_input_file.seek(0)
36
+ return load_audio(wav_input_file.name, sample_rate)
37
+
38
+ def load_audio(audio_filename, sample_rate, duration=10):
39
+ """Loads an audio file.
40
+ Args:
41
+ audio_filename: File path to load.
42
+ sample_rate: The number of samples per second at which the audio will be
43
+ returned. Resampling will be performed if necessary.
44
+ Returns:
45
+ A numpy array of audio samples, single-channel (mono) and sampled at the
46
+ specified rate, in float32 format.
47
+ Raises:
48
+ AudioIOReadError: If librosa is unable to load the audio data.
49
+ """
50
+ try:
51
+ y, unused_sr = librosa.load(audio_filename, sr=sample_rate, mono=True, duration=duration)
52
+ except Exception as e: # pylint: disable=broad-except
53
+ raise AudioIOReadError(e)
54
+ return y