import gradio as gr import cv2 import numpy as np import tensorflow as tf import tensorflow_addons from facenet_pytorch import MTCNN from PIL import Image import moviepy.editor as mp import os import zipfile # local_zip = "FINAL-EFFICIENTNETV2-B0.zip" # zip_ref = zipfile.ZipFile(local_zip, 'r') # zip_ref.extractall('FINAL-EFFICIENTNETV2-B0') # zip_ref.close() # Load face detector mtcnn = MTCNN(margin=14, keep_all=True, factor=0.7, device='cpu') #Face Detection function, Reference: (Timesler, 2020); Source link: https://www.kaggle.com/timesler/facial-recognition-model-in-pytorch class DetectionPipeline: """Pipeline class for detecting faces in the frames of a video file.""" def __init__(self, detector, n_frames=None, batch_size=60, resize=None): """Constructor for DetectionPipeline class. Keyword Arguments: n_frames {int} -- Total number of frames to load. These will be evenly spaced throughout the video. If not specified (i.e., None), all frames will be loaded. (default: {None}) batch_size {int} -- Batch size to use with MTCNN face detector. (default: {32}) resize {float} -- Fraction by which to resize frames from original prior to face detection. A value less than 1 results in downsampling and a value greater than 1 result in upsampling. (default: {None}) """ self.detector = detector self.n_frames = n_frames self.batch_size = batch_size self.resize = resize def __call__(self, filename): """Load frames from an MP4 video and detect faces. Arguments: filename {str} -- Path to video. """ # Create video reader and find length v_cap = cv2.VideoCapture(filename) v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT)) # Pick 'n_frames' evenly spaced frames to sample if self.n_frames is None: sample = np.arange(0, v_len) else: sample = np.linspace(0, v_len - 1, self.n_frames).astype(int) # Loop through frames faces = [] frames = [] for j in range(v_len): success = v_cap.grab() if j in sample: # Load frame success, frame = v_cap.retrieve() if not success: continue frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # frame = Image.fromarray(frame) # Resize frame to desired size if self.resize is not None: frame = frame.resize([int(d * self.resize) for d in frame.size]) frames.append(frame) # When batch is full, detect faces and reset frame list if len(frames) % self.batch_size == 0 or j == sample[-1]: boxes, probs = self.detector.detect(frames) for i in range(len(frames)): if boxes[i] is None: faces.append(face2) #append previous face frame if no face is detected continue box = boxes[i][0].astype(int) frame = frames[i] face = frame[box[1]:box[3], box[0]:box[2]] if not face.any(): faces.append(face2) #append previous face frame if no face is detected continue face2 = cv2.resize(face, (224, 224)) faces.append(face2) frames = [] v_cap.release() return faces detection_pipeline = DetectionPipeline(detector=mtcnn,n_frames=20, batch_size=60) model = tf.keras.models.load_model("p1") def deepfakespredict(input_video): faces = detection_pipeline(input_video) total = 0 real = 0 fake = 0 for face in faces: face2 = face/255 pred = model.predict(np.expand_dims(face2, axis=0))[0] total+=1 pred2 = pred[1] if pred2 > 0.5: fake+=1 else: real+=1 fake_ratio = fake/total text ="" text2 = "Deepfakes Confidence: " + str(fake_ratio*100) + "%" if fake_ratio >= 0.5: text = "The video is FAKE." else: text = "The video is REAL." face_frames = [] for face in faces: face_frame = Image.fromarray(face.astype('uint8'), 'RGB') face_frames.append(face_frame) face_frames[0].save('results.gif', save_all=True, append_images=face_frames[1:], duration = 250, loop = 100 ) clip = mp.VideoFileClip("results.gif") clip.write_videofile("video.mp4") return text, text2, "video.mp4" title="EfficientNetV2 Deepfakes Video Detector" examples = [ ['Video1-fake-1-ff.mp4'], ['Video6-real-1-ff.mp4'], ['Video3-fake-3-ff.mp4'], ['Video8-real-3-ff.mp4'], ['real-1.mp4'], ['fake-1.mp4'], ] gr.Interface(deepfakespredict, inputs = ["video"], outputs=["text","text", gr.Video(label="Detected face sequence")], title=title, examples=examples ).launch()