Spaces:
Sleeping
Sleeping
File size: 2,279 Bytes
f029287 a61fcf3 f029287 a61fcf3 f029287 e268a60 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 285119c a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 a61fcf3 f029287 e268a60 f029287 a61fcf3 f029287 a61fcf3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
import gradio as gr
import librosa
import numpy as np
import matplotlib.pyplot as plt
import tempfile
import librosa.display
def calculate_basic_metrics(y, sr):
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
average_pitch = np.mean(pitches[pitches > 0])
mfccs = librosa.feature.mfcc(y=y, sr=sr)
energy = np.sum(y ** 2)
zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
return {
'Average Pitch': average_pitch,
'Number of MFCCs': mfccs.shape[1],
'Energy': energy,
'Zero Crossing Rate': zero_crossing_rate,
'Spectral Centroid': spectral_centroid
}
def calculate_advanced_metrics(y, sr):
metrics = {}
f0, _, _ = librosa.pyin(y, fmin=50, fmax=4000)
if f0 is not None:
metrics['Average F0 (YIN)'] = np.nanmean(f0)
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
metrics['Average Chroma'] = np.mean(chroma)
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
metrics['Average Spectral Contrast'] = np.mean(spectral_contrast)
return metrics
def generate_spectrogram(y, sr):
plt.figure(figsize=(10, 4))
librosa.display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max), sr=sr, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.tight_layout()
with tempfile.NamedTemporaryFile(delete=False, suffix='.png', mode='w+b') as f:
plt.savefig(f.name, format='png')
plt.close()
return f.name
def process_audio(file):
if file is None:
return {}, "placeholder.png"
sr, y = file
if y.dtype != np.float32:
y = y.astype(np.float32) / np.iinfo(y.dtype).max
basic_metrics = calculate_basic_metrics(y, sr)
advanced_metrics = calculate_advanced_metrics(y, sr)
metrics = {**basic_metrics, **advanced_metrics}
image_path = generate_spectrogram(y, sr)
return metrics, image_path
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(label="Upload Audio"),
outputs=["json", "image"],
title="Speech-Scope",
description="Speech and audio Metrics Analysis"
)
iface.launch(debug=True)
|