Commit
•
b8277b5
1
Parent(s):
cbd878e
Update app.py
Browse files
app.py
CHANGED
@@ -20,28 +20,44 @@ model = AutoModelForAudioClassification.from_pretrained("./")
|
|
20 |
feature_extractor = ASTFeatureExtractor.from_pretrained("./")
|
21 |
|
22 |
def plot_waveform(waveform, sr):
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
def plot_spectrogram(waveform, sr):
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
def custom_feature_extraction(audio, sr=16000, target_length=1024):
|
47 |
features = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding="max_length", max_length=target_length)
|
@@ -94,7 +110,8 @@ def predict_voice(audio_file_path):
|
|
94 |
spectrogram_plot,
|
95 |
transcription[0] # Assuming transcription returns a list with a single string
|
96 |
)
|
97 |
-
|
|
|
98 |
return f"Error during processing: {e}", None, None, ""
|
99 |
|
100 |
with gr.Blocks(css="style.css") as demo:
|
|
|
20 |
feature_extractor = ASTFeatureExtractor.from_pretrained("./")
|
21 |
|
22 |
def plot_waveform(waveform, sr):
|
23 |
+
try:
|
24 |
+
plt.figure(figsize=(12, 4))
|
25 |
+
plt.title('Waveform')
|
26 |
+
plt.ylabel('Amplitude')
|
27 |
+
plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
|
28 |
+
plt.xlabel('Time (s)')
|
29 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
|
30 |
+
plt.savefig(temp_file.name)
|
31 |
+
plt.close()
|
32 |
+
|
33 |
+
file_size = os.path.getsize(temp_file.name)
|
34 |
+
logger.debug(f"Waveform image generated: {temp_file.name}, Size: {file_size} bytes")
|
35 |
+
|
36 |
+
return temp_file.name
|
37 |
+
except Exception as e:
|
38 |
+
logger.error(f"Error generating waveform image: {e}")
|
39 |
+
raise
|
40 |
|
41 |
def plot_spectrogram(waveform, sr):
|
42 |
+
try:
|
43 |
+
S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
|
44 |
+
S_DB = librosa.power_to_db(S, ref=np.max)
|
45 |
+
plt.figure(figsize=(12, 6))
|
46 |
+
librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
|
47 |
+
plt.title('Mel Spectrogram')
|
48 |
+
plt.colorbar(format='%+2.0f dB')
|
49 |
+
plt.tight_layout()
|
50 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
|
51 |
+
plt.savefig(temp_file.name)
|
52 |
+
plt.close()
|
53 |
+
|
54 |
+
file_size = os.path.getsize(temp_file.name)
|
55 |
+
logger.debug(f"Spectrogram image generated: {temp_file.name}, Size: {file_size} bytes")
|
56 |
+
|
57 |
+
return temp_file.name
|
58 |
+
except Exception as e:
|
59 |
+
logger.error(f"Error generating spectrogram image: {e}")
|
60 |
+
raise
|
61 |
|
62 |
def custom_feature_extraction(audio, sr=16000, target_length=1024):
|
63 |
features = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding="max_length", max_length=target_length)
|
|
|
110 |
spectrogram_plot,
|
111 |
transcription[0] # Assuming transcription returns a list with a single string
|
112 |
)
|
113 |
+
except Exception as e:
|
114 |
+
logger.error(f"Error during voice prediction: {e}")
|
115 |
return f"Error during processing: {e}", None, None, ""
|
116 |
|
117 |
with gr.Blocks(css="style.css") as demo:
|