deepfake_detect / app.py
ssaad5678's picture
Create app.py
f96d7b3 verified
raw
history blame
4.26 kB
import gradio as gr
import torch
import torch.nn.functional as F
from facenet_pytorch import MTCNN, InceptionResnetV1
import cv2
from pytorch_grad_cam import GradCAM
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
from pytorch_grad_cam.utils.image import show_cam_on_image
from PIL import Image
import numpy as np
import warnings
warnings.filterwarnings("ignore")
# Download and Load Model
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
mtcnn = MTCNN(
select_largest=False,
post_process=False,
device=DEVICE
).eval()
model = InceptionResnetV1(
pretrained="vggface2",
classify=True,
num_classes=1,
device=DEVICE
)
checkpoint = torch.load("resnetinceptionv1_epoch_32.pth", map_location=torch.device('cpu'))
model.load_state_dict(checkpoint['model_state_dict'])
model.to(DEVICE)
model.eval()
# Model Inference
def predict_frame(frame):
"""Predict whether the input frame contains a real or fake face"""
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame_pil = Image.fromarray(frame)
# Detect faces
boxes, probs = mtcnn.detect(frame_pil)
faces = []
confidences = []
if boxes is not None:
for box, prob in zip(boxes, probs):
# Extract face
x1, y1, x2, y2 = box.astype(int)
face = frame[y1:y2, x1:x2]
# Preprocess the face
face = cv2.resize(face, (160, 160)) # Resize to match InceptionResnetV1 input size
face = torch.tensor(face).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0
# Predict
with torch.no_grad():
output = torch.sigmoid(model(face).squeeze())
prediction = "real" if output.item() < 0.5 else "fake"
# Confidence scores
real_prediction = 1 - output.item()
fake_prediction = output.item()
confidences.append({
'prediction': prediction,
'confidence': fake_prediction if prediction == 'fake' else real_prediction
})
# Visualize
target_layers = [model.block8.branch1[-1]]
use_cuda = True if torch.cuda.is_available() else False
cam = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda)
targets = [ClassifierOutputTarget(0)]
grayscale_cam = cam(input_tensor=face, targets=targets, eigen_smooth=True)
grayscale_cam = grayscale_cam[0, :]
visualization = show_cam_on_image(face.squeeze().permute(1, 2, 0).cpu().numpy(), grayscale_cam, use_rgb=True)
face_with_mask = cv2.addWeighted((face.squeeze().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8), 1, (visualization * 255).astype(np.uint8), 0.5, 0)
faces.append(face_with_mask)
return faces, confidences
def predict_video(input_video):
cap = cv2.VideoCapture(input_video)
frames = []
all_confidences = []
while True:
ret, frame = cap.read()
if not ret:
break
faces, confidences = predict_frame(frame)
if faces:
frames.extend(faces)
all_confidences.extend(confidences)
cap.release()
# Determine the final prediction based on the maximum occurrence of predictions
final_prediction = 'fake' if sum(1 for conf in all_confidences if conf['prediction'] == 'fake') > sum(1 for conf in all_confidences if conf['prediction'] == 'real') else 'real'
return final_prediction, frames, all_confidences
# Gradio Interface
def show_detected_faces(video):
prediction, frames, confidences = predict_video(video)
return prediction, frames, confidences
gr.Interface(
fn=show_detected_faces,
inputs=[
gr.inputs.Video(label="Input Video")
],
outputs=[
gr.outputs.Label(label="Class"),
gr.outputs.Image(label="Detected Faces", type="numpy", multiple=True),
gr.outputs.Label(label="Confidences", type="json")
],
title="Deep Fake Video Detection",
description="Detect whether the Video is fake or real and visualize the detected faces with confidence scores."
).launch()