Spaces:
Runtime error
Runtime error
import gradio as gr | |
from audioseal import AudioSeal | |
import torch | |
import torchaudio | |
import torchaudio.transforms as T | |
import traceback | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import io | |
from PIL import Image | |
def plot_spectrogram(waveform, sample_rate): | |
"""Plot and return a spectrogram.""" | |
spectrogram_transform = T.Spectrogram() | |
spectrogram = spectrogram_transform(waveform) | |
spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram) | |
plt.figure(figsize=(10, 4)) | |
plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower') | |
plt.colorbar(format='%+2.0f dB') | |
plt.title('Spectrogram') | |
plt.xlabel('Time Frame') | |
plt.ylabel('Frequency') | |
buf = io.BytesIO() | |
plt.savefig(buf, format='png') | |
plt.close() | |
buf.seek(0) | |
return Image.open(buf) | |
def detect_watermark(audio_file_path, threshold=0.99): | |
try: | |
waveform, sample_rate = torchaudio.load(audio_file_path) | |
# Normalize and resample | |
waveform = waveform / torch.max(torch.abs(waveform)) | |
if sample_rate != 16000: | |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) | |
waveform = resampler(waveform) | |
sample_rate = 16000 | |
if waveform.ndim < 3: | |
waveform = waveform.unsqueeze(0) | |
detector = AudioSeal.load_detector("audioseal_detector_16bits") | |
result, confidence = detector.detect_watermark(waveform, message_threshold=threshold) | |
# Visual feedback | |
waveform_image = plot_spectrogram(waveform.squeeze(), sample_rate) | |
if result: | |
detection_message = f"AI-generated with confidence: {np.mean(confidence.numpy()):.2f}" | |
else: | |
detection_message = "Likely human-generated or the AI watermark is undetectable at the current threshold." | |
return detection_message, waveform_image | |
except Exception as e: | |
error_traceback = traceback.format_exc() | |
return f"Error occurred: {e}\n\n{error_traceback}", None | |
# Interface with dynamic threshold and visualization | |
interface = gr.Interface( | |
fn=detect_watermark, | |
inputs=[gr.Audio(label="Upload your audio", type="filepath"), gr.Slider(label="Detection Threshold", minimum=0, maximum=1, value=0.99)], | |
outputs=["text", "image"], | |
title="Deep Fake Defender: AI Voice Cloning Detection", | |
description="Upload an audio file to check if it's AI-generated or genuine. Adjust the detection threshold to change sensitivity." | |
) | |
if __name__ == "__main__": | |
interface.launch() | |