abby4 commited on
Commit
dc1ec27
1 Parent(s): c0d7ef2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -22
app.py CHANGED
@@ -1,31 +1,28 @@
1
  import gradio as gr
2
  from fastai.vision.all import *
 
3
  import numpy as np
4
  import matplotlib.pyplot as plt
 
5
  import tempfile
6
- import sounddevice as sd
7
- import soundfile as sf
8
 
9
- # Load your trained model and define labels
10
  learn = load_learner('model.pkl')
11
  labels = learn.dls.vocab
12
 
13
- def record_audio(duration=3, sr=44100, channels=1):
14
- print("Recording...")
15
- audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
16
- sd.wait()
17
- print("Recording stopped.")
18
- return audio, sr
19
-
20
- def audio_to_spectrogram(audio_file, sr):
21
  if audio_file.endswith('.mp3'):
22
- with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
23
- audio = AudioSegment.from_mp3(audio_file)
24
- audio.export(temp_wav.name, format='wav')
25
- y, sr = librosa.load(temp_wav.name, sr=None)
 
 
26
  else:
27
  y, sr = librosa.load(audio_file, sr=None)
28
- S = librosa.feature.melspectrogram(y=audio[:, 0], sr=sr, n_mels=128, fmax=8000)
 
29
  S_dB = librosa.power_to_db(S, ref=np.max)
30
  fig, ax = plt.subplots()
31
  img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
@@ -37,18 +34,17 @@ def audio_to_spectrogram(audio_file, sr):
37
  return spectrogram_file
38
 
39
  def predict(audio):
40
- audio_data, sr = sf.read(audio)
41
- spectrogram_file = audio_to_spectrogram(audio_data, sr)
42
  img = PILImage.create(spectrogram_file)
43
  img = img.resize((512, 512))
44
  pred, pred_idx, probs = learn.predict(img)
45
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
46
 
47
- # Launch the interface
48
- examples = [['example_audio.mp3']]
49
  gr.Interface(
50
  fn=predict,
51
- inputs=gr.Audio(sources="microphone", type="file", label="Record audio (WAV)"),
 
 
52
  outputs=gr.components.Label(num_top_classes=3),
53
- examples=examples,
54
  ).launch()
 
1
  import gradio as gr
2
  from fastai.vision.all import *
3
+ import librosa
4
  import numpy as np
5
  import matplotlib.pyplot as plt
6
+ from pydub import AudioSegment
7
  import tempfile
8
+ import PIL
 
9
 
 
10
  learn = load_learner('model.pkl')
11
  labels = learn.dls.vocab
12
 
13
+ def audio_to_spectrogram(audio_file):
14
+ if isinstance(audio_file, str):
 
 
 
 
 
 
15
  if audio_file.endswith('.mp3'):
16
+ with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
17
+ audio = AudioSegment.from_mp3(audio_file)
18
+ audio.export(temp_wav.name, format='wav')
19
+ y, sr = librosa.load(temp_wav.name, sr=None)
20
+ else:
21
+ y, sr = librosa.load(audio_file, sr=None)
22
  else:
23
  y, sr = librosa.load(audio_file, sr=None)
24
+
25
+ S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
26
  S_dB = librosa.power_to_db(S, ref=np.max)
27
  fig, ax = plt.subplots()
28
  img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
 
34
  return spectrogram_file
35
 
36
  def predict(audio):
37
+ spectrogram_file = audio_to_spectrogram(audio)
 
38
  img = PILImage.create(spectrogram_file)
39
  img = img.resize((512, 512))
40
  pred, pred_idx, probs = learn.predict(img)
41
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
42
 
 
 
43
  gr.Interface(
44
  fn=predict,
45
+ inputs=[
46
+ gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record audio (WAV or MP3)"),
47
+ ],
48
  outputs=gr.components.Label(num_top_classes=3),
49
+ live=True
50
  ).launch()