Spaces:
Runtime error
Runtime error
File size: 2,587 Bytes
f4f5a40 488d50e f03ec98 e7c7540 1111e0a 811d3ce 1111e0a dff69a4 1111e0a d7a0eb1 e4b1e14 1111e0a 09457f4 1111e0a 09457f4 e7c7540 a4ace8a 270455b d7a0eb1 1111e0a 270455b 1111e0a 105e8bf 1111e0a d7a0eb1 811d3ce 1111e0a 811d3ce 1111e0a 270455b 1111e0a 270455b 1111e0a 270455b f03ec98 faee536 1111e0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import gradio as gr
from audioseal import AudioSeal
import torch
import torchaudio
import torchaudio.transforms as T
import traceback
import matplotlib.pyplot as plt
import numpy as np
import io
from PIL import Image
def plot_spectrogram(waveform, sample_rate):
"""Plot and return a spectrogram."""
spectrogram_transform = T.Spectrogram()
spectrogram = spectrogram_transform(waveform)
spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)
plt.figure(figsize=(10, 4))
plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram')
plt.xlabel('Time Frame')
plt.ylabel('Frequency')
buf = io.BytesIO()
plt.savefig(buf, format='png')
plt.close()
buf.seek(0)
return Image.open(buf)
def detect_watermark(audio_file_path, threshold=0.99):
try:
waveform, sample_rate = torchaudio.load(audio_file_path)
# Normalize and resample
waveform = waveform / torch.max(torch.abs(waveform))
if sample_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
waveform = resampler(waveform)
sample_rate = 16000
if waveform.ndim < 3:
waveform = waveform.unsqueeze(0)
detector = AudioSeal.load_detector("audioseal_detector_16bits")
result, confidence = detector.detect_watermark(waveform, message_threshold=threshold)
# Visual feedback
waveform_image = plot_spectrogram(waveform.squeeze(), sample_rate)
if result:
detection_message = f"AI-generated with confidence: {np.mean(confidence.numpy()):.2f}"
else:
detection_message = "Likely human-generated or the AI watermark is undetectable at the current threshold."
return detection_message, waveform_image
except Exception as e:
error_traceback = traceback.format_exc()
return f"Error occurred: {e}\n\n{error_traceback}", None
# Interface with dynamic threshold and visualization
interface = gr.Interface(
fn=detect_watermark,
inputs=[gr.Audio(label="Upload your audio", type="filepath"), gr.Slider(label="Detection Threshold", minimum=0, maximum=1, value=0.99)],
outputs=["text", "image"],
title="Deep Fake Defender: AI Voice Cloning Detection",
description="Upload an audio file to check if it's AI-generated or genuine. Adjust the detection threshold to change sensitivity."
)
if __name__ == "__main__":
interface.launch()
|