Spaces:

ssaad5678
/

deepfake_detect

Runtime error

App Files Files Community

deepfake_detect / app.py

ssaad5678

Create app.py

f96d7b3 verified 7 months ago

raw

history blame

4.26 kB

	import gradio as gr
	import torch
	import torch.nn.functional as F
	from facenet_pytorch import MTCNN, InceptionResnetV1
	import cv2
	from pytorch_grad_cam import GradCAM
	from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
	from pytorch_grad_cam.utils.image import show_cam_on_image
	from PIL import Image
	import numpy as np
	import warnings

	warnings.filterwarnings("ignore")

	# Download and Load Model
	DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

	mtcnn = MTCNN(
	select_largest=False,
	post_process=False,
	device=DEVICE
	).eval()
	model = InceptionResnetV1(
	pretrained="vggface2",
	classify=True,
	num_classes=1,
	device=DEVICE
	)

	checkpoint = torch.load("resnetinceptionv1_epoch_32.pth", map_location=torch.device('cpu'))
	model.load_state_dict(checkpoint['model_state_dict'])
	model.to(DEVICE)
	model.eval()

	# Model Inference
	def predict_frame(frame):
	"""Predict whether the input frame contains a real or fake face"""
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	frame_pil = Image.fromarray(frame)

	# Detect faces
	boxes, probs = mtcnn.detect(frame_pil)

	faces = []
	confidences = []

	if boxes is not None:
	for box, prob in zip(boxes, probs):
	# Extract face
	x1, y1, x2, y2 = box.astype(int)
	face = frame[y1:y2, x1:x2]

	# Preprocess the face
	face = cv2.resize(face, (160, 160)) # Resize to match InceptionResnetV1 input size
	face = torch.tensor(face).permute(2, 0, 1).unsqueeze(0).float().to(DEVICE) / 255.0

	# Predict
	with torch.no_grad():
	output = torch.sigmoid(model(face).squeeze())
	prediction = "real" if output.item() < 0.5 else "fake"

	# Confidence scores
	real_prediction = 1 - output.item()
	fake_prediction = output.item()

	confidences.append({
	'prediction': prediction,
	'confidence': fake_prediction if prediction == 'fake' else real_prediction
	})

	# Visualize
	target_layers = [model.block8.branch1[-1]]
	use_cuda = True if torch.cuda.is_available() else False
	cam = GradCAM(model=model, target_layers=target_layers, use_cuda=use_cuda)
	targets = [ClassifierOutputTarget(0)]
	grayscale_cam = cam(input_tensor=face, targets=targets, eigen_smooth=True)
	grayscale_cam = grayscale_cam[0, :]
	visualization = show_cam_on_image(face.squeeze().permute(1, 2, 0).cpu().numpy(), grayscale_cam, use_rgb=True)
	face_with_mask = cv2.addWeighted((face.squeeze().permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8), 1, (visualization * 255).astype(np.uint8), 0.5, 0)
	faces.append(face_with_mask)

	return faces, confidences

	def predict_video(input_video):
	cap = cv2.VideoCapture(input_video)

	frames = []
	all_confidences = []

	while True:
	ret, frame = cap.read()
	if not ret:
	break

	faces, confidences = predict_frame(frame)

	if faces:
	frames.extend(faces)
	all_confidences.extend(confidences)

	cap.release()

	# Determine the final prediction based on the maximum occurrence of predictions
	final_prediction = 'fake' if sum(1 for conf in all_confidences if conf['prediction'] == 'fake') > sum(1 for conf in all_confidences if conf['prediction'] == 'real') else 'real'

	return final_prediction, frames, all_confidences

	# Gradio Interface
	def show_detected_faces(video):
	prediction, frames, confidences = predict_video(video)
	return prediction, frames, confidences

	gr.Interface(
	fn=show_detected_faces,
	inputs=[
	gr.inputs.Video(label="Input Video")
	],
	outputs=[
	gr.outputs.Label(label="Class"),
	gr.outputs.Image(label="Detected Faces", type="numpy", multiple=True),
	gr.outputs.Label(label="Confidences", type="json")
	],
	title="Deep Fake Video Detection",
	description="Detect whether the Video is fake or real and visualize the detected faces with confidence scores."
	).launch()