lyimo commited on
Commit
752df3a
1 Parent(s): 4a8dba0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -25
app.py CHANGED
@@ -9,24 +9,17 @@ import tempfile
9
  learn = load_learner('model.pkl')
10
  labels = learn.dls.vocab
11
 
12
- def audio_to_spectrogram(audio):
13
- # Handle both uploaded files and recorded audio
14
- if isinstance(audio, str): # Uploaded file
15
- if audio.endswith('.mp3'):
16
- with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
17
- audio = AudioSegment.from_mp3(audio)
18
- audio.export(temp_wav.name, format='wav')
19
- y, sr = librosa.load(temp_wav.name, sr=None)
20
- else:
21
- y, sr = librosa.load(audio, sr=None)
22
- else: # Recorded audio
23
- y, sr = librosa.load(audio, sr=None)
24
 
25
- # Generate mel spectrogram
26
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
27
  S_dB = librosa.power_to_db(S, ref=np.max)
28
-
29
- # Create and save spectrogram image
30
  fig, ax = plt.subplots()
31
  img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
32
  fig.colorbar(img, ax=ax, format='%+2.0f dB')
@@ -34,23 +27,20 @@ def audio_to_spectrogram(audio):
34
  spectrogram_file = "spectrogram.png"
35
  plt.savefig(spectrogram_file)
36
  plt.close()
37
-
38
  return spectrogram_file
39
 
40
  def predict(audio):
41
  spectrogram_file = audio_to_spectrogram(audio)
42
  img = PILImage.create(spectrogram_file)
43
- img = img.resize((512, 512)) # Assuming your model expects this size
44
  pred, pred_idx, probs = learn.predict(img)
45
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
46
 
47
- # Create Gradio interface with upload and microphone options
48
- examples = ['example_audio.mp3'] # Optional: provide example audio for upload
49
  gr.Interface(
50
  fn=predict,
51
- inputs=[
52
- gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
53
- gr.Audio(sources="microphone", label="Record audio")
54
- ],
55
- outputs=gr.components.Label(num_top_classes=3)
56
- ).launch()
 
9
  learn = load_learner('model.pkl')
10
  labels = learn.dls.vocab
11
 
12
+ def audio_to_spectrogram(audio_file):
13
+ if audio_file.endswith('.mp3'):
14
+ with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
15
+ audio = AudioSegment.from_mp3(audio_file)
16
+ audio.export(temp_wav.name, format='wav')
17
+ y, sr = librosa.load(temp_wav.name, sr=None)
18
+ else:
19
+ y, sr = librosa.load(audio_file, sr=None)
 
 
 
 
20
 
 
21
  S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
22
  S_dB = librosa.power_to_db(S, ref=np.max)
 
 
23
  fig, ax = plt.subplots()
24
  img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
25
  fig.colorbar(img, ax=ax, format='%+2.0f dB')
 
27
  spectrogram_file = "spectrogram.png"
28
  plt.savefig(spectrogram_file)
29
  plt.close()
 
30
  return spectrogram_file
31
 
32
  def predict(audio):
33
  spectrogram_file = audio_to_spectrogram(audio)
34
  img = PILImage.create(spectrogram_file)
35
+ img = img.resize((512, 512))
36
  pred, pred_idx, probs = learn.predict(img)
37
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
38
 
39
+ examples = ['example_audio.mp3']
40
+
41
  gr.Interface(
42
  fn=predict,
43
+ inputs=gr.Audio(sources="upload", type="filepath", label="Upload audio (WAV or MP3)"),
44
+ outputs=gr.components.Label(num_top_classes=3),
45
+ examples=examples,
46
+ ).launch()