audioseal_watermark_detection

Runtime error

App Files Files Community

audioseal_watermark_detection / app.py

Kabatubare

Update app.py

1111e0a verified 8 months ago

raw

history blame

2.59 kB

	import gradio as gr
	from audioseal import AudioSeal
	import torch
	import torchaudio
	import torchaudio.transforms as T
	import traceback
	import matplotlib.pyplot as plt
	import numpy as np
	import io
	from PIL import Image

	def plot_spectrogram(waveform, sample_rate):
	"""Plot and return a spectrogram."""
	spectrogram_transform = T.Spectrogram()
	spectrogram = spectrogram_transform(waveform)
	spectrogram_db = torchaudio.transforms.AmplitudeToDB()(spectrogram)

	plt.figure(figsize=(10, 4))
	plt.imshow(spectrogram_db[0].numpy(), cmap='hot', aspect='auto', origin='lower')
	plt.colorbar(format='%+2.0f dB')
	plt.title('Spectrogram')
	plt.xlabel('Time Frame')
	plt.ylabel('Frequency')

	buf = io.BytesIO()
	plt.savefig(buf, format='png')
	plt.close()
	buf.seek(0)

	return Image.open(buf)

	def detect_watermark(audio_file_path, threshold=0.99):
	try:
	waveform, sample_rate = torchaudio.load(audio_file_path)

	# Normalize and resample
	waveform = waveform / torch.max(torch.abs(waveform))
	if sample_rate != 16000:
	resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
	waveform = resampler(waveform)
	sample_rate = 16000

	if waveform.ndim < 3:
	waveform = waveform.unsqueeze(0)

	detector = AudioSeal.load_detector("audioseal_detector_16bits")
	result, confidence = detector.detect_watermark(waveform, message_threshold=threshold)

	# Visual feedback
	waveform_image = plot_spectrogram(waveform.squeeze(), sample_rate)

	if result:
	detection_message = f"AI-generated with confidence: {np.mean(confidence.numpy()):.2f}"
	else:
	detection_message = "Likely human-generated or the AI watermark is undetectable at the current threshold."

	return detection_message, waveform_image
	except Exception as e:
	error_traceback = traceback.format_exc()
	return f"Error occurred: {e}\n\n{error_traceback}", None

	# Interface with dynamic threshold and visualization
	interface = gr.Interface(
	fn=detect_watermark,
	inputs=[gr.Audio(label="Upload your audio", type="filepath"), gr.Slider(label="Detection Threshold", minimum=0, maximum=1, value=0.99)],
	outputs=["text", "image"],
	title="Deep Fake Defender: AI Voice Cloning Detection",
	description="Upload an audio file to check if it's AI-generated or genuine. Adjust the detection threshold to change sensitivity."
	)

	if __name__ == "__main__":
	interface.launch()