import gradio as gr import librosa import librosa.display import matplotlib.pyplot as plt import numpy as np from inference_sdk import InferenceHTTPClient # Initialize the client CLIENT = InferenceHTTPClient( api_url="https://detect.roboflow.com", api_key="C3oFYzyxfOZhNxlC8r5E" ) def process_audio(audio): # Load the audio file y, sr = librosa.load(audio, sr=None) # Convert to log mel spectrogram mel_spect = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128) log_mel_spect = librosa.power_to_db(mel_spect, ref=np.max) # Save the spectrogram as an image plt.figure(figsize=(10, 4)) librosa.display.specshow(log_mel_spect, sr=sr, x_axis='time', y_axis='mel') plt.colorbar(format='%+2.0f dB') plt.title('Log Mel Spectrogram') plt.tight_layout() plt.savefig('spectrogram.png') plt.close() return 'spectrogram.png' def infer(audio): # Process the audio to get the spectrogram image spectrogram_path = process_audio(audio) # Infer on the spectrogram image result = CLIENT.infer(spectrogram_path, model_id="depressense/2") result_class = result['predictions'][0]['class'] confidence = result['predictions'][0]['confidence'] result = "Depressed" if result_class == 1 else "Healthy" result += f" with {confidence * 100:.2f}% confidence" return result, spectrogram_path # Create the Gradio interface iface = gr.Interface( fn=infer, inputs=gr.Audio(sources=["upload", "microphone"], type="filepath"), outputs=[gr.Textbox(label="Inference Result"), gr.Image(label="Log Mel Spectrogram")], title="DepresSense Model", description="Upload a voice recording to get inference results from DepresSense.", theme=gr.themes.Soft(), live=True, # css="styles.css" # Reference to the CSS file ) # Add a header image and footer text iface.launch( share=True, debug=True, favicon_path="favicon.png", # header="path/to/header_image.png", # footer="DepresSense - Your mental health companion" )