Spaces:
Sleeping
Sleeping
Commit
·
56bd901
1
Parent(s):
c5ab0c7
Atualizando
Browse files- README.md +19 -5
- app.py +83 -0
- requirements.txt +4 -0
README.md
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
---
|
2 |
-
title: Speech
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.12.0
|
8 |
app_file: app.py
|
@@ -10,4 +10,18 @@ pinned: false
|
|
10 |
license: ecl-2.0
|
11 |
---
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Speech-Scope
|
3 |
+
emoji: 🎤
|
4 |
+
colorFrom: gray
|
5 |
+
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
sdk_version: 4.12.0
|
8 |
app_file: app.py
|
|
|
10 |
license: ecl-2.0
|
11 |
---
|
12 |
|
13 |
+
## Speech-Scope
|
14 |
+
|
15 |
+
Análise de métricas de áudio.
|
16 |
+
|
17 |
+
Confira a referência de configuração em [Hugging Face Spaces Config Reference](https://huggingface.co/docs/hub/spaces-config-reference).
|
18 |
+
|
19 |
+
## Desenvolvedor
|
20 |
+
|
21 |
+
Desenvolvido por Ramon Mayor Martins (2023)
|
22 |
+
|
23 |
+
- E-mail: [rmayormartins@gmail.com](mailto:rmayormartins@gmail.com)
|
24 |
+
- Homepage: [https://rmayormartins.github.io/](https://rmayormartins.github.io/)
|
25 |
+
- Twitter: [@rmayormartins](https://twitter.com/rmayormartins)
|
26 |
+
- GitHub: [https://github.com/rmayormartins](https://github.com/rmayormartins)
|
27 |
+
|
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import librosa
|
3 |
+
import numpy as np
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import tempfile
|
6 |
+
import librosa.display
|
7 |
+
|
8 |
+
|
9 |
+
def calculate_basic_metrics(y, sr):
|
10 |
+
pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
|
11 |
+
average_pitch = np.mean(pitches[pitches > 0])
|
12 |
+
mfccs = librosa.feature.mfcc(y=y, sr=sr)
|
13 |
+
energy = np.sum(y ** 2)
|
14 |
+
zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
|
15 |
+
spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
|
16 |
+
|
17 |
+
return {
|
18 |
+
'Average Pitch': average_pitch,
|
19 |
+
'Number of MFCCs': mfccs.shape[1],
|
20 |
+
'Energy': energy,
|
21 |
+
'Zero Crossing Rate': zero_crossing_rate,
|
22 |
+
'Spectral Centroid': spectral_centroid
|
23 |
+
}
|
24 |
+
|
25 |
+
|
26 |
+
def calculate_advanced_metrics(y, sr):
|
27 |
+
metrics = {}
|
28 |
+
|
29 |
+
f0, _, _ = librosa.pyin(y, fmin=50, fmax=4000)
|
30 |
+
if f0 is not None:
|
31 |
+
metrics['Average F0 (YIN)'] = np.nanmean(f0)
|
32 |
+
|
33 |
+
chroma = librosa.feature.chroma_stft(y=y, sr=sr)
|
34 |
+
metrics['Average Chroma'] = np.mean(chroma)
|
35 |
+
|
36 |
+
spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
|
37 |
+
metrics['Average Spectral Contrast'] = np.mean(spectral_contrast)
|
38 |
+
|
39 |
+
return metrics
|
40 |
+
|
41 |
+
|
42 |
+
def generate_spectrogram(y, sr):
|
43 |
+
plt.figure(figsize=(10, 4))
|
44 |
+
librosa.display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max), sr=sr, x_axis='time', y_axis='log')
|
45 |
+
plt.colorbar(format='%+2.0f dB')
|
46 |
+
plt.title('Spectrogram')
|
47 |
+
plt.tight_layout()
|
48 |
+
|
49 |
+
|
50 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.png', mode='w+b') as f:
|
51 |
+
plt.savefig(f.name, format='png')
|
52 |
+
plt.close()
|
53 |
+
return f.name
|
54 |
+
|
55 |
+
|
56 |
+
def process_audio(file):
|
57 |
+
if file is None:
|
58 |
+
return {}, "placeholder.png"
|
59 |
+
|
60 |
+
sr, y = file
|
61 |
+
|
62 |
+
if y.dtype != np.float32:
|
63 |
+
y = y.astype(np.float32) / np.iinfo(y.dtype).max
|
64 |
+
|
65 |
+
basic_metrics = calculate_basic_metrics(y, sr)
|
66 |
+
advanced_metrics = calculate_advanced_metrics(y, sr)
|
67 |
+
|
68 |
+
metrics = {**basic_metrics, **advanced_metrics}
|
69 |
+
|
70 |
+
image_path = generate_spectrogram(y, sr)
|
71 |
+
|
72 |
+
return metrics, image_path
|
73 |
+
|
74 |
+
|
75 |
+
iface = gr.Interface(
|
76 |
+
fn=process_audio,
|
77 |
+
inputs=gr.Audio(label="Upload Audio"),
|
78 |
+
outputs=["json", "image"],
|
79 |
+
title="Speech-Scope",
|
80 |
+
description="Speech and audio Metrics Analysis"
|
81 |
+
)
|
82 |
+
|
83 |
+
iface.launch(debug=True)
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
librosa
|
3 |
+
numpy
|
4 |
+
matplotlib
|