Spaces:

Kaworu17
/

YAMNet

Sleeping

App Files Files Community

Kaworu17 commited on May 5

Commit

e63bfc0

verified ·

1 Parent(s): 4a03abd

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -14

app.py CHANGED Viewed

@@ -4,11 +4,10 @@ import numpy as np
 import matplotlib.pyplot as plt
 import gradio as gr
 import soundfile as sf
-from scipy.signal import resample  # Correct resampling method
 # Load YAMNet model from TensorFlow Hub
-yamnet_model_handle = "https://tfhub.dev/google/yamnet/1"
-yamnet_model = hub.load(yamnet_model_handle)
 # Load class labels
 def load_class_map():
@@ -17,30 +16,30 @@ def load_class_map():
         'https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv'
     )
     with open(class_map_path, 'r') as f:
-        class_names = [line.strip().split(',')[2] for line in f.readlines()[1:]]
-    return class_names
 class_names = load_class_map()
 # Classification function
 def classify_audio(file_path):
     try:
-        # Load audio file (WAV, MP3, etc.)
         audio_data, sample_rate = sf.read(file_path)
-        # Convert stereo to mono if needed
         if len(audio_data.shape) > 1:
             audio_data = np.mean(audio_data, axis=1)
-        # Normalize audio
         audio_data = audio_data / np.max(np.abs(audio_data))
-        # Resample to 16kHz if necessary
         target_rate = 16000
         if sample_rate != target_rate:
             duration = audio_data.shape[0] / sample_rate
             new_length = int(duration * target_rate)
             audio_data = resample(audio_data, new_length)
         # Convert to tensor
         waveform = tf.convert_to_tensor(audio_data, dtype=tf.float32)
@@ -53,20 +52,20 @@ def classify_audio(file_path):
         top_prediction = class_names[top_5[0]]
         top_scores = {class_names[i]: float(mean_scores[i]) for i in top_5}
-        # Create waveform plot
         fig, ax = plt.subplots()
         ax.plot(audio_data)
         ax.set_title("Waveform")
-        ax.set_xlabel("Time")
         ax.set_ylabel("Amplitude")
         plt.tight_layout()
         return top_prediction, top_scores, fig
     except Exception as e:
-        return f"Error processing audio: {e}", {}, None
-# Gradio interface
 interface = gr.Interface(
     fn=classify_audio,
     inputs=gr.Audio(type="filepath", label="Upload .wav or .mp3 audio file"),
@@ -80,4 +79,4 @@ interface = gr.Interface(
 )
 if __name__ == "__main__":
-    interface.launch()

 import matplotlib.pyplot as plt
 import gradio as gr
 import soundfile as sf
+from scipy.signal import resample
 # Load YAMNet model from TensorFlow Hub
+yamnet_model = hub.load("https://tfhub.dev/google/yamnet/1")
 # Load class labels
 def load_class_map():
         'https://raw.githubusercontent.com/tensorflow/models/master/research/audioset/yamnet/yamnet_class_map.csv'
     )
     with open(class_map_path, 'r') as f:
+        return [line.strip().split(',')[2] for line in f.readlines()[1:]]
 class_names = load_class_map()
 # Classification function
 def classify_audio(file_path):
     try:
+        # Load audio
         audio_data, sample_rate = sf.read(file_path)
+        # Convert stereo to mono
         if len(audio_data.shape) > 1:
             audio_data = np.mean(audio_data, axis=1)
+        # Normalize
         audio_data = audio_data / np.max(np.abs(audio_data))
+        # Resample to 16kHz if needed
         target_rate = 16000
         if sample_rate != target_rate:
             duration = audio_data.shape[0] / sample_rate
             new_length = int(duration * target_rate)
             audio_data = resample(audio_data, new_length)
+            sample_rate = target_rate
         # Convert to tensor
         waveform = tf.convert_to_tensor(audio_data, dtype=tf.float32)
         top_prediction = class_names[top_5[0]]
         top_scores = {class_names[i]: float(mean_scores[i]) for i in top_5}
+        # Waveform plot
         fig, ax = plt.subplots()
         ax.plot(audio_data)
         ax.set_title("Waveform")
+        ax.set_xlabel("Time (samples)")
         ax.set_ylabel("Amplitude")
         plt.tight_layout()
         return top_prediction, top_scores, fig
     except Exception as e:
+        return f"Error processing audio: {str(e)}", {}, None
+# Gradio interface (HF-compatible)
 interface = gr.Interface(
     fn=classify_audio,
     inputs=gr.Audio(type="filepath", label="Upload .wav or .mp3 audio file"),
 )
 if __name__ == "__main__":
+    interface.launch()