rmayormartins commited on
Commit
56bd901
·
1 Parent(s): c5ab0c7

Atualizando

Browse files
Files changed (3) hide show
  1. README.md +19 -5
  2. app.py +83 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Speech Scope
3
- emoji: 🦀
4
- colorFrom: pink
5
- colorTo: purple
6
  sdk: gradio
7
  sdk_version: 4.12.0
8
  app_file: app.py
@@ -10,4 +10,18 @@ pinned: false
10
  license: ecl-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Speech-Scope
3
+ emoji: 🎤
4
+ colorFrom: gray
5
+ colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 4.12.0
8
  app_file: app.py
 
10
  license: ecl-2.0
11
  ---
12
 
13
+ ## Speech-Scope
14
+
15
+ Análise de métricas de áudio.
16
+
17
+ Confira a referência de configuração em [Hugging Face Spaces Config Reference](https://huggingface.co/docs/hub/spaces-config-reference).
18
+
19
+ ## Desenvolvedor
20
+
21
+ Desenvolvido por Ramon Mayor Martins (2023)
22
+
23
+ - E-mail: [rmayormartins@gmail.com](mailto:rmayormartins@gmail.com)
24
+ - Homepage: [https://rmayormartins.github.io/](https://rmayormartins.github.io/)
25
+ - Twitter: [@rmayormartins](https://twitter.com/rmayormartins)
26
+ - GitHub: [https://github.com/rmayormartins](https://github.com/rmayormartins)
27
+
app.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import tempfile
6
+ import librosa.display
7
+
8
+
9
+ def calculate_basic_metrics(y, sr):
10
+ pitches, magnitudes = librosa.piptrack(y=y, sr=sr)
11
+ average_pitch = np.mean(pitches[pitches > 0])
12
+ mfccs = librosa.feature.mfcc(y=y, sr=sr)
13
+ energy = np.sum(y ** 2)
14
+ zero_crossing_rate = np.mean(librosa.feature.zero_crossing_rate(y))
15
+ spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
16
+
17
+ return {
18
+ 'Average Pitch': average_pitch,
19
+ 'Number of MFCCs': mfccs.shape[1],
20
+ 'Energy': energy,
21
+ 'Zero Crossing Rate': zero_crossing_rate,
22
+ 'Spectral Centroid': spectral_centroid
23
+ }
24
+
25
+
26
+ def calculate_advanced_metrics(y, sr):
27
+ metrics = {}
28
+
29
+ f0, _, _ = librosa.pyin(y, fmin=50, fmax=4000)
30
+ if f0 is not None:
31
+ metrics['Average F0 (YIN)'] = np.nanmean(f0)
32
+
33
+ chroma = librosa.feature.chroma_stft(y=y, sr=sr)
34
+ metrics['Average Chroma'] = np.mean(chroma)
35
+
36
+ spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
37
+ metrics['Average Spectral Contrast'] = np.mean(spectral_contrast)
38
+
39
+ return metrics
40
+
41
+
42
+ def generate_spectrogram(y, sr):
43
+ plt.figure(figsize=(10, 4))
44
+ librosa.display.specshow(librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max), sr=sr, x_axis='time', y_axis='log')
45
+ plt.colorbar(format='%+2.0f dB')
46
+ plt.title('Spectrogram')
47
+ plt.tight_layout()
48
+
49
+
50
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.png', mode='w+b') as f:
51
+ plt.savefig(f.name, format='png')
52
+ plt.close()
53
+ return f.name
54
+
55
+
56
+ def process_audio(file):
57
+ if file is None:
58
+ return {}, "placeholder.png"
59
+
60
+ sr, y = file
61
+
62
+ if y.dtype != np.float32:
63
+ y = y.astype(np.float32) / np.iinfo(y.dtype).max
64
+
65
+ basic_metrics = calculate_basic_metrics(y, sr)
66
+ advanced_metrics = calculate_advanced_metrics(y, sr)
67
+
68
+ metrics = {**basic_metrics, **advanced_metrics}
69
+
70
+ image_path = generate_spectrogram(y, sr)
71
+
72
+ return metrics, image_path
73
+
74
+
75
+ iface = gr.Interface(
76
+ fn=process_audio,
77
+ inputs=gr.Audio(label="Upload Audio"),
78
+ outputs=["json", "image"],
79
+ title="Speech-Scope",
80
+ description="Speech and audio Metrics Analysis"
81
+ )
82
+
83
+ iface.launch(debug=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ librosa
3
+ numpy
4
+ matplotlib