Spaces:

Kabatubare
/

voice_clone_detection_v1

Paused

App Files Files Community

Kabatubare commited on Mar 16

Commit

b8277b5

•

1 Parent(s): cbd878e

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -21

app.py CHANGED Viewed

@@ -20,28 +20,44 @@ model = AutoModelForAudioClassification.from_pretrained("./")
 feature_extractor = ASTFeatureExtractor.from_pretrained("./")
 def plot_waveform(waveform, sr):
-    plt.figure(figsize=(12, 4))
-    plt.title('Waveform')
-    plt.ylabel('Amplitude')
-    plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
-    plt.xlabel('Time (s)')
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
-    plt.savefig(temp_file.name)
-    plt.close()
-    return temp_file.name
 def plot_spectrogram(waveform, sr):
-    S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
-    S_DB = librosa.power_to_db(S, ref=np.max)
-    plt.figure(figsize=(12, 6))
-    librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel', cmap='inferno')
-    plt.title('Mel Spectrogram')
-    plt.colorbar(format='%+2.0f dB')
-    plt.tight_layout()
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
-    plt.savefig(temp_file.name)
-    plt.close()
-    return temp_file.name
 def custom_feature_extraction(audio, sr=16000, target_length=1024):
     features = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding="max_length", max_length=target_length)
@@ -94,7 +110,8 @@ def predict_voice(audio_file_path):
             spectrogram_plot,
             transcription[0]  # Assuming transcription returns a list with a single string
         )
-    except Exception as e:
         return f"Error during processing: {e}", None, None, ""
 with gr.Blocks(css="style.css") as demo:

 feature_extractor = ASTFeatureExtractor.from_pretrained("./")
 def plot_waveform(waveform, sr):
+    try:
+        plt.figure(figsize=(12, 4))
+        plt.title('Waveform')
+        plt.ylabel('Amplitude')
+        plt.plot(np.linspace(0, len(waveform) / sr, len(waveform)), waveform)
+        plt.xlabel('Time (s)')
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
+        plt.savefig(temp_file.name)
+        plt.close()
+        file_size = os.path.getsize(temp_file.name)
+        logger.debug(f"Waveform image generated: {temp_file.name}, Size: {file_size} bytes")
+        return temp_file.name
+    except Exception as e:
+        logger.error(f"Error generating waveform image: {e}")
+        raise
 def plot_spectrogram(waveform, sr):
+    try:
+        S = librosa.feature.melspectrogram(y=waveform, sr=sr, n_mels=128)
+        S_DB = librosa.power_to_db(S, ref=np.max)
+        plt.figure(figsize=(12, 6))
+        librosa.display.specshow(S_DB, sr=sr, x_axis='time', y_axis='mel')
+        plt.title('Mel Spectrogram')
+        plt.colorbar(format='%+2.0f dB')
+        plt.tight_layout()
+        temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.png', dir='./')
+        plt.savefig(temp_file.name)
+        plt.close()
+        file_size = os.path.getsize(temp_file.name)
+        logger.debug(f"Spectrogram image generated: {temp_file.name}, Size: {file_size} bytes")
+        return temp_file.name
+    except Exception as e:
+        logger.error(f"Error generating spectrogram image: {e}")
+        raise
 def custom_feature_extraction(audio, sr=16000, target_length=1024):
     features = feature_extractor(audio, sampling_rate=sr, return_tensors="pt", padding="max_length", max_length=target_length)
             spectrogram_plot,
             transcription[0]  # Assuming transcription returns a list with a single string
         )
+     except Exception as e:
+        logger.error(f"Error during voice prediction: {e}")
         return f"Error during processing: {e}", None, None, ""
 with gr.Blocks(css="style.css") as demo: