Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
import torchaudio | |
import librosa | |
import numpy as np | |
from transformers import Wav2Vec2Processor, Wav2Vec2ForSequenceClassification | |
import torch.nn.functional as F | |
import torchaudio.transforms as T | |
model_name = "Mahmoud59/wav2vec2-fake-audio-detector" | |
processor = Wav2Vec2Processor.from_pretrained(model_name) | |
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name) | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
def classify_audio(audio_path): | |
# Load audio with librosa | |
waveform, sample_rate = librosa.load(audio_path, sr=16000) # librosa automatically resamples to 16kHz | |
# Convert to tensor | |
waveform = torch.tensor(waveform, dtype=torch.float32) | |
# Process audio | |
inputs = processor(waveform, sampling_rate=16000, return_tensors="pt", padding=True) | |
inputs = {key: val.to(device) for key, val in inputs.items()} | |
# Run inference | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
# Get prediction | |
pred = torch.argmax(outputs.logits, dim=-1).item() | |
return "Fake" if pred == 1 else "Real" | |
# 6) Build Gradio interface | |
demo = gr.Interface( | |
fn=classify_audio, | |
inputs=gr.Audio(type="filepath"), | |
outputs="text", | |
title="Wav2Vec2 Deepfake Detection", | |
description="Upload an audio sample to check if it is fake or real, along with confidence." | |
) | |
if __name__ == "__main__": | |
demo.launch() | |