Spaces:
Sleeping
Sleeping
import gradio as gr | |
import librosa | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import tempfile | |
import librosa.display | |
def calculate_basic_metrics(y, sr): | |
pitches, magnitudes = librosa.piptrack(y=y, sr=sr) | |
average_pitch = np.mean(pitches[pitches > 0]) | |
mfccs = librosa.feature.mfcc(y=y, sr=sr) | |
energy = np.sum(y ** 2) | |
zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y)) | |
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)) | |
return { | |
'Average Pitch': average_pitch, | |
'Number of MFCCs': mfccs.shape[1], | |
'Energy': energy, | |
'Zero Crossing Rate': zero_crossing_rate, | |
'Spectral Centroid': spectral_centroid | |
} | |
def calculate_advanced_metrics(y, sr): | |
metrics = {} | |
f0, _, _ = librosa.pyin(y, fmin=50, fmax=4000) | |
if f0 is not None: | |
metrics['Average F0 (YIN)'] = np.nanmean(f0) | |
chroma = librosa.feature.chroma_stft(y=y, sr=sr) | |
metrics['Average Chroma'] = np.mean(chroma) | |
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr) | |
metrics['Average Spectral Contrast'] = np.mean(spectral_contrast) | |
return metrics | |
def generate_spectrogram(y, sr): | |
plt.figure(figsize=(10, 4)) | |
librosa.display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max), sr=sr, x_axis='time', y_axis='log') | |
plt.colorbar(format='%+2.0f dB') | |
plt.title('Spectrogram') | |
plt.tight_layout() | |
with tempfile.NamedTemporaryFile(delete=False, suffix='.png', mode='w+b') as f: | |
plt.savefig(f.name, format='png') | |
plt.close() | |
return f.name | |
def process_audio(file): | |
if file is None: | |
return {}, "placeholder.png" | |
sr, y = file | |
if y.dtype != np.float32: | |
y = y.astype(np.float32) / np.iinfo(y.dtype).max | |
basic_metrics = calculate_basic_metrics(y, sr) | |
advanced_metrics = calculate_advanced_metrics(y, sr) | |
metrics = {**basic_metrics, **advanced_metrics} | |
image_path = generate_spectrogram(y, sr) | |
return metrics, image_path | |
iface = gr.Interface( | |
fn=process_audio, | |
inputs=gr.Audio(label="Upload Audio"), | |
outputs=["json", "image"], | |
title="Speech-Scope", | |
description="Speech and audio Metrics Analysis" | |
) | |
iface.launch(debug=True) | |