| import gradio as gr |
| import tensorflow as tf |
| import tensorflow_hub as hub |
| import numpy as np |
| import librosa |
| from tensorflow.keras.models import load_model |
|
|
| |
| print("Loading YAMNet model...") |
| YAMNET_MODEL_PATH = 'https://tfhub.dev/google/yamnet/1' |
| yamnet_model = hub.load(YAMNET_MODEL_PATH) |
| print("YAMNet loaded.") |
|
|
| print("Loading custom classifier model...") |
| SAVED_KERAS_MODEL_PATH = "emergency_audio_classifier.keras" |
| classifier_model = load_model(SAVED_KERAS_MODEL_PATH) |
| print("Custom classifier loaded.") |
|
|
| |
| SAMPLE_RATE = 16000 |
|
|
| |
| |
| |
| |
| def classify_audio(filepath): |
| """ |
| Takes an audio FILEPATH from Gradio, processes it, and returns predictions. |
| """ |
| if filepath is None: |
| print("Error: No audio file provided") |
| return {"Error: No audio provided": 1.0} |
| |
| print(f"[DEBUG] Received file: {filepath}") |
|
|
| try: |
| |
| |
| |
| |
| audio_data, _ = librosa.load(filepath, |
| sr=SAMPLE_RATE, |
| mono=True) |
| |
| if len(audio_data) < 100: |
| print("Error: Audio is too short after processing") |
| return {"Error: Audio is too short": 1.0} |
|
|
| print(f"[DEBUG] Audio loaded and processed: {len(audio_data)} samples") |
|
|
| |
| _, embeddings, _ = yamnet_model(audio_data) |
| mean_embedding = np.mean(embeddings.numpy(), axis=0) |
| audio_embedding = np.expand_dims(mean_embedding, axis=0) |
| |
| prediction_prob = classifier_model.predict(audio_embedding)[0][0] |
| |
| probability_distress = float(prediction_prob) |
| probability_normal = 1.0 - probability_distress |
| |
| output_labels = { |
| "Distress": probability_distress, |
| "Normal": probability_normal |
| } |
| |
| print(f"[DEBUG] Prediction: {output_labels}") |
| return output_labels |
| |
| except Exception as e: |
| print(f"Error during classification: {e}") |
| return {f"Error: {str(e)}": 1.0} |
|
|
| |
| |
| |
| |
| iface = gr.Interface( |
| fn=classify_audio, |
| inputs=gr.Audio( |
| sources=["microphone", "upload"], |
| |
| type="filepath", |
| label="Record Audio or Upload File" |
| ), |
| outputs=gr.Label( |
| num_top_classes=2, |
| label="Prediction" |
| ), |
| title="🚨 Emergency Audio Detection Module", |
| description="A two-stage (YAMNet + Custom) model to classify audio as 'Distress' or 'Normal'." |
| ) |
|
|
| |
| if __name__ == "__main__": |
| iface.launch() |