abby4 commited on
Commit
a088675
1 Parent(s): 3a213e0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -18
app.py CHANGED
@@ -1,32 +1,24 @@
1
  import gradio as gr
2
  from fastai.vision.all import *
3
- import librosa
4
  import numpy as np
5
  import matplotlib.pyplot as plt
6
- from pydub import AudioSegment
7
  import tempfile
 
 
8
 
 
9
  learn = load_learner('model.pkl')
10
  labels = learn.dls.vocab
11
 
12
-
13
  def record_audio(duration=3, sr=44100, channels=1):
14
  print("Recording...")
15
  audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
16
  sd.wait()
17
  print("Recording stopped.")
18
  return audio, sr
19
-
20
- def audio_to_spectrogram(audio_file,):
21
- if audio_file.endswith('.mp3'):
22
- with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
23
- audio = AudioSegment.from_mp3(audio_file)
24
- audio.export(temp_wav.name, format='wav')
25
- y, sr = librosa.load(temp_wav.name, sr=None)
26
- else:
27
- y, sr = librosa.load(audio_file, sr=None)
28
 
29
- S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
 
30
  S_dB = librosa.power_to_db(S, ref=np.max)
31
  fig, ax = plt.subplots()
32
  img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
@@ -38,17 +30,18 @@ def audio_to_spectrogram(audio_file,):
38
  return spectrogram_file
39
 
40
  def predict(audio):
41
- spectrogram_file = audio_to_spectrogram(audio)
 
42
  img = PILImage.create(spectrogram_file)
43
  img = img.resize((512, 512))
44
  pred, pred_idx, probs = learn.predict(img)
45
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
46
 
47
- examples = ['example_audio.mp3']
48
-
49
  gr.Interface(
50
  fn=predict,
51
- inputs=gr.Audio(sources="microphone", type="filepath", label="Upload audio (WAV or MP3)"),
52
  outputs=gr.components.Label(num_top_classes=3),
53
  examples=examples,
54
- ).launch()
 
1
  import gradio as gr
2
  from fastai.vision.all import *
 
3
  import numpy as np
4
  import matplotlib.pyplot as plt
 
5
  import tempfile
6
+ import sounddevice as sd
7
+ import soundfile as sf
8
 
9
+ # Load your trained model and define labels
10
  learn = load_learner('model.pkl')
11
  labels = learn.dls.vocab
12
 
 
13
  def record_audio(duration=3, sr=44100, channels=1):
14
  print("Recording...")
15
  audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
16
  sd.wait()
17
  print("Recording stopped.")
18
  return audio, sr
 
 
 
 
 
 
 
 
 
19
 
20
+ def audio_to_spectrogram(audio, sr):
21
+ S = librosa.feature.melspectrogram(y=audio[:, 0], sr=sr, n_mels=128, fmax=8000)
22
  S_dB = librosa.power_to_db(S, ref=np.max)
23
  fig, ax = plt.subplots()
24
  img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
 
30
  return spectrogram_file
31
 
32
  def predict(audio):
33
+ audio_data, sr = sf.read(audio)
34
+ spectrogram_file = audio_to_spectrogram(audio_data, sr)
35
  img = PILImage.create(spectrogram_file)
36
  img = img.resize((512, 512))
37
  pred, pred_idx, probs = learn.predict(img)
38
  return {labels[i]: float(probs[i]) for i in range(len(labels))}
39
 
40
+ # Launch the interface
41
+ examples = [['example_audio.mp3']]
42
  gr.Interface(
43
  fn=predict,
44
+ inputs=gr.Audio(sources="microphone", type="file", label="Record audio (WAV)"),
45
  outputs=gr.components.Label(num_top_classes=3),
46
  examples=examples,
47
+ ).launch()