Spaces:

mariam-ahmed15
/

Deepfake-audio-detection

Running

App Files Files Community

Deepfake-audio-detection / app.py

mariam-ahmed15

Update app.py

2919490 verified 39 minutes ago

raw

history blame contribute delete

2.8 kB

	import torch
	import gradio as gr
	import librosa
	from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2FeatureExtractor
	import os

	# 1. CONFIGURATION
	MODEL_ID = "facebook/wav2vec2-xls-r-300m"
	QUANTIZED_MODEL_PATH = "quantized_model.pth"

	# 2. LOAD MODEL
	print("Loading model architecture...")
	model = Wav2Vec2ForSequenceClassification.from_pretrained(MODEL_ID, num_labels=2)
	feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_ID)

	# Apply quantization
	model = torch.quantization.quantize_dynamic(
	model, {torch.nn.Linear}, dtype=torch.qint8
	)

	# Load weights
	if os.path.exists(QUANTIZED_MODEL_PATH):
	print("Loading quantized weights...")
	try:
	model.load_state_dict(torch.load(QUANTIZED_MODEL_PATH, map_location=torch.device('cpu')))
	except Exception as e:
	print(f"Error loading weights: {e}")
	else:
	print(f"Warning: {QUANTIZED_MODEL_PATH} not found. Using random weights.")

	model.eval()

	# 3. PREDICTION FUNCTION (NO CHUNKING)
	def predict_audio(audio_path):
	if audio_path is None:
	return "No Audio Provided"

	try:
	# Load the entire audio file at 16k sample rate
	# librosa handles the conversion of WhatsApp files (.opus, .m4a) automatically
	speech_array, sr = librosa.load(audio_path, sr=16000)

	# Process the WHOLE file at once
	# truncation=False ensures we do not cut the file
	inputs = feature_extractor(
	speech_array,
	sampling_rate=16000,
	return_tensors="pt",
	padding=True,
	truncation=False
	)

	with torch.no_grad():
	# Pass the massive input tensor to the model
	logits = model(**inputs).logits
	probs = torch.nn.functional.softmax(logits, dim=-1)

	# Extract probabilities
	# Label 1 = Deepfake, Label 0 = Real (Verify your mapping)
	fake_prob = probs[0][1].item()
	real_prob = probs[0][0].item()

	return {
	"Deepfake": fake_prob,
	"Real": real_prob
	}

	except RuntimeError as e:
	if "memory" in str(e).lower():
	return "Error: File too long (Out of Memory). Please use a shorter clip."
	return f"Runtime Error: {str(e)}"
	except Exception as e:
	return f"Error processing audio: {str(e)}"

	# 4. CREATE INTERFACE
	iface = gr.Interface(
	fn=predict_audio,
	inputs=gr.Audio(
	sources=["upload", "microphone"],
	type="filepath",
	label="Upload WhatsApp Voice Note"
	),
	outputs=gr.Label(num_top_classes=2),
	title="Deepfake Audio Detection API (No Chunking)",
	description="Analyzes the full audio file in one pass."
	)

	iface.launch()