Kabatubare commited on
Commit
b8277b5
1 Parent(s): cbd878e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -21
app.py CHANGED
@@ -20,28 +20,44 @@ model = AutoModelForAudioClassification.from_pretrained("./")
20
  feature_extractor = ASTFeatureExtractor.from_pretrained("./")
21
 
22
  def plot_waveform(waveform, sr):
23
- plt.figure(figsize=(12, 4))
24
- plt.title('Waveform')
25
- plt.ylabel('Amplitude')
26
- plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
27
- plt.xlabel('Time (s)')
28
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
29
- plt.savefig(temp_file.name)
30
- plt.close()
31
- return temp_file.name
 
 
 
 
 
 
 
 
32
 
33
  def plot_spectrogram(waveform, sr):
34
- S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
35
- S_DB = librosa.power_to_db(S, ref=np.max)
36
- plt.figure(figsize=(12, 6))
37
- librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel', cmap='inferno')
38
- plt.title('Mel Spectrogram')
39
- plt.colorbar(format='%+2.0f dB')
40
- plt.tight_layout()
41
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
42
- plt.savefig(temp_file.name)
43
- plt.close()
44
- return temp_file.name
 
 
 
 
 
 
 
 
45
 
46
  def custom_feature_extraction(audio, sr=16000, target_length=1024):
47
  features = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding="max_length", max_length=target_length)
@@ -94,7 +110,8 @@ def predict_voice(audio_file_path):
94
  spectrogram_plot,
95
  transcription[0] # Assuming transcription returns a list with a single string
96
  )
97
- except Exception as e:
 
98
  return f"Error during processing: {e}", None, None, ""
99
 
100
  with gr.Blocks(css="style.css") as demo:
 
20
  feature_extractor = ASTFeatureExtractor.from_pretrained("./")
21
 
22
  def plot_waveform(waveform, sr):
23
+ try:
24
+ plt.figure(figsize=(12, 4))
25
+ plt.title('Waveform')
26
+ plt.ylabel('Amplitude')
27
+ plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
28
+ plt.xlabel('Time (s)')
29
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
30
+ plt.savefig(temp_file.name)
31
+ plt.close()
32
+
33
+ file_size = os.path.getsize(temp_file.name)
34
+ logger.debug(f"Waveform image generated: {temp_file.name}, Size: {file_size} bytes")
35
+
36
+ return temp_file.name
37
+ except Exception as e:
38
+ logger.error(f"Error generating waveform image: {e}")
39
+ raise
40
 
41
  def plot_spectrogram(waveform, sr):
42
+ try:
43
+ S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
44
+ S_DB = librosa.power_to_db(S, ref=np.max)
45
+ plt.figure(figsize=(12, 6))
46
+ librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
47
+ plt.title('Mel Spectrogram')
48
+ plt.colorbar(format='%+2.0f dB')
49
+ plt.tight_layout()
50
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
51
+ plt.savefig(temp_file.name)
52
+ plt.close()
53
+
54
+ file_size = os.path.getsize(temp_file.name)
55
+ logger.debug(f"Spectrogram image generated: {temp_file.name}, Size: {file_size} bytes")
56
+
57
+ return temp_file.name
58
+ except Exception as e:
59
+ logger.error(f"Error generating spectrogram image: {e}")
60
+ raise
61
 
62
  def custom_feature_extraction(audio, sr=16000, target_length=1024):
63
  features = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding="max_length", max_length=target_length)
 
110
  spectrogram_plot,
111
  transcription[0] # Assuming transcription returns a list with a single string
112
  )
113
+ except Exception as e:
114
+ logger.error(f"Error during voice prediction: {e}")
115
  return f"Error during processing: {e}", None, None, ""
116
 
117
  with gr.Blocks(css="style.css") as demo: