Spaces:

abby4
/

Green_pegion

Sleeping

App Files Files Community

abby4 commited on Apr 24

Commit

dc1ec27

•

1 Parent(s): c0d7ef2

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -22

app.py CHANGED Viewed

@@ -1,31 +1,28 @@
 import gradio as gr
 from fastai.vision.all import *
 import numpy as np
 import matplotlib.pyplot as plt
 import tempfile
-import sounddevice as sd
-import soundfile as sf
-# Load your trained model and define labels
 learn = load_learner('model.pkl')
 labels = learn.dls.vocab
-def record_audio(duration=3, sr=44100, channels=1):
-    print("Recording...")
-    audio = sd.rec(int(duration * sr), samplerate=sr, channels=channels, dtype='float32')
-    sd.wait()
-    print("Recording stopped.")
-    return audio, sr
-def audio_to_spectrogram(audio_file, sr):
         if audio_file.endswith('.mp3'):
-        with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
-            audio = AudioSegment.from_mp3(audio_file)
-            audio.export(temp_wav.name, format='wav')
-            y, sr = librosa.load(temp_wav.name, sr=None)
     else:
         y, sr = librosa.load(audio_file, sr=None)
-    S = librosa.feature.melspectrogram(y=audio[:, 0], sr=sr, n_mels=128, fmax=8000)
     S_dB = librosa.power_to_db(S, ref=np.max)
     fig, ax = plt.subplots()
     img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
@@ -37,18 +34,17 @@ def audio_to_spectrogram(audio_file, sr):
     return spectrogram_file
 def predict(audio):
-    audio_data, sr = sf.read(audio)
-    spectrogram_file = audio_to_spectrogram(audio_data, sr)
     img = PILImage.create(spectrogram_file)
     img = img.resize((512, 512))
     pred, pred_idx, probs = learn.predict(img)
     return {labels[i]: float(probs[i]) for i in range(len(labels))}
-# Launch the interface
-examples = [['example_audio.mp3']]
 gr.Interface(
     fn=predict,
-    inputs=gr.Audio(sources="microphone", type="file", label="Record audio (WAV)"),
     outputs=gr.components.Label(num_top_classes=3),
-    examples=examples,
 ).launch()

 import gradio as gr
 from fastai.vision.all import *
+import librosa
 import numpy as np
 import matplotlib.pyplot as plt
+from pydub import AudioSegment
 import tempfile
+import PIL
 learn = load_learner('model.pkl')
 labels = learn.dls.vocab
+def audio_to_spectrogram(audio_file):
+    if isinstance(audio_file, str):
         if audio_file.endswith('.mp3'):
+            with tempfile.NamedTemporaryFile(suffix='.wav') as temp_wav:
+                audio = AudioSegment.from_mp3(audio_file)
+                audio.export(temp_wav.name, format='wav')
+                y, sr = librosa.load(temp_wav.name, sr=None)
+        else:
+            y, sr = librosa.load(audio_file, sr=None)
     else:
         y, sr = librosa.load(audio_file, sr=None)
+    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000)
     S_dB = librosa.power_to_db(S, ref=np.max)
     fig, ax = plt.subplots()
     img = librosa.display.specshow(S_dB, x_axis='time', y_axis='mel', sr=sr, fmax=8000, ax=ax)
     return spectrogram_file
 def predict(audio):
+    spectrogram_file = audio_to_spectrogram(audio)
     img = PILImage.create(spectrogram_file)
     img = img.resize((512, 512))
     pred, pred_idx, probs = learn.predict(img)
     return {labels[i]: float(probs[i]) for i in range(len(labels))}
 gr.Interface(
     fn=predict,
+    inputs=[
+        gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record audio (WAV or MP3)"),
+    ],
     outputs=gr.components.Label(num_top_classes=3),
+    live=True
 ).launch()