Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import torch.nn.functional as F | |
from facenet_pytorch import MTCNN, InceptionResnetV1 | |
import cv2 | |
from pytorch_grad_cam import GradCAM | |
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget | |
from pytorch_grad_cam.utils.image import show_cam_on_image | |
from PIL import Image | |
import numpy as np | |
import warnings | |
warnings.filterwarnings("ignore") | |
# Download and Load Model | |
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' | |
mtcnn = MTCNN( | |
select_largest=False, | |
post_process=False, | |
device=DEVICE | |
).eval() | |
model = InceptionResnetV1( | |
pretrained="vggface2", | |
classify=True, | |
num_classes=1, | |
device=DEVICE | |
) | |
checkpoint = torch.load("resnetinceptionv1_epoch_32.pth", map_location=torch.device('cpu')) | |
model.load_state_dict(checkpoint['model_state_dict']) | |
model.to(DEVICE) | |
model.eval() | |
# Model Inference | |
def predict_frame(frame): | |
"""Predict whether the input frame contains a real or fake face""" | |
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) | |
frame_pil = Image.fromarray(frame) | |
# Detect faces | |
boxes, probs = mtcnn.detect(frame_pil) | |
faces = [] | |
confidences = [] | |
if boxes is not None: | |
for box, prob in zip(boxes, probs): | |
# Extract face | |
x1, y1, x2, y2 = box.astype(int) | |
face = frame[y1:y2, x1:x2] | |
# Preprocess the face | |
face = cv2.resize(face, (160, 160)) # Resize to match InceptionResnetV1 input size | |
face = torch.tensor(face).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0 | |
# Predict | |
with torch.no_grad(): | |
output = torch.sigmoid(model(face).squeeze()) | |
prediction = "real" if output.item() < 0.5 else "fake" | |
# Confidence scores | |
real_prediction = 1 - output.item() | |
fake_prediction = output.item() | |
confidences.append({ | |
'prediction': prediction, | |
'confidence': fake_prediction if prediction == 'fake' else real_prediction | |
}) | |
# Visualize | |
target_layers = [model.block8.branch1[-1]] | |
use_cuda = True if torch.cuda.is_available() else False | |
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda) | |
targets = [ClassifierOutputTarget(0)] | |
grayscale_cam = cam(input_tensor=face, targets=targets, eigen_smooth=True) | |
grayscale_cam = grayscale_cam[0, :] | |
visualization = show_cam_on_image(face.squeeze().permute(1, 2, 0).cpu().numpy(), grayscale_cam, use_rgb=True) | |
face_with_mask = cv2.addWeighted((face.squeeze().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8), 1, (visualization * 255).astype(np.uint8), 0.5, 0) | |
faces.append(face_with_mask) | |
return faces, confidences | |
def predict_video(input_video): | |
cap = cv2.VideoCapture(input_video) | |
frames = [] | |
all_confidences = [] | |
while True: | |
ret, frame = cap.read() | |
if not ret: | |
break | |
faces, confidences = predict_frame(frame) | |
if faces: | |
frames.extend(faces) | |
all_confidences.extend(confidences) | |
cap.release() | |
# Determine the final prediction based on the maximum occurrence of predictions | |
final_prediction = 'fake' if sum(1 for conf in all_confidences if conf['prediction'] == 'fake') > sum(1 for conf in all_confidences if conf['prediction'] == 'real') else 'real' | |
return final_prediction, frames, all_confidences | |
# Gradio Interface | |
def show_detected_faces(video): | |
prediction, frames, confidences = predict_video(video) | |
return prediction, frames, confidences | |
gr.Interface( | |
fn=show_detected_faces, | |
inputs=[ | |
gr.inputs.Video(label="Input Video") | |
], | |
outputs=[ | |
gr.outputs.Label(label="Class"), | |
gr.outputs.Image(label="Detected Faces", type="numpy", multiple=True), | |
gr.outputs.Label(label="Confidences", type="json") | |
], | |
title="Deep Fake Video Detection", | |
description="Detect whether the Video is fake or real and visualize the detected faces with confidence scores." | |
).launch() | |