lyimo commited on
Commit
4856025
1 Parent(s): 752df3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -5,16 +5,20 @@ import numpy as np
5
  import matplotlib.pyplot as plt
6
  from pydub import AudioSegment
7
  import tempfile
 
8
 
9
  learn = load_learner('model.pkl')
10
  labels = learn.dls.vocab
11
 
12
  def audio_to_spectrogram(audio_file):
13
- if audio_file.endswith('.mp3'):
14
- with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
15
- audio = AudioSegment.from_mp3(audio_file)
16
- audio.export(temp_wav.name, format='wav')
17
- y, sr = librosa.load(temp_wav.name, sr=None)
 
 
 
18
  else:
19
  y, sr = librosa.load(audio_file, sr=None)
20
 
@@ -36,11 +40,11 @@ def predict(audio):
36
  pred, pred_idx, probs = learn.predict(img)
37
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
38
 
39
- examples = ['example_audio.mp3']
40
-
41
  gr.Interface(
42
  fn=predict,
43
- inputs=gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
 
 
44
  outputs=gr.components.Label(num_top_classes=3),
45
- examples=examples,
46
- ).launch()
 
5
  import matplotlib.pyplot as plt
6
  from pydub import AudioSegment
7
  import tempfile
8
+ import PIL
9
 
10
  learn = load_learner('model.pkl')
11
  labels = learn.dls.vocab
12
 
13
  def audio_to_spectrogram(audio_file):
14
+ if isinstance(audio_file, str):
15
+ if audio_file.endswith('.mp3'):
16
+ with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
17
+ audio = AudioSegment.from_mp3(audio_file)
18
+ audio.export(temp_wav.name, format='wav')
19
+ y, sr = librosa.load(temp_wav.name, sr=None)
20
+ else:
21
+ y, sr = librosa.load(audio_file, sr=None)
22
  else:
23
  y, sr = librosa.load(audio_file, sr=None)
24
 
 
40
  pred, pred_idx, probs = learn.predict(img)
41
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
42
 
 
 
43
  gr.Interface(
44
  fn=predict,
45
+ inputs=[
46
+ gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record audio (WAV or MP3)"),
47
+ ],
48
  outputs=gr.components.Label(num_top_classes=3),
49
+ live=True
50
+ ).launch()