|
import warnings |
|
import cv2 |
|
import dlib |
|
from pytorch_grad_cam.utils.image import show_cam_on_image |
|
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget |
|
import gradio as gr |
|
import numpy as np |
|
import torch |
|
from retinaface.pre_trained_models import get_model |
|
|
|
from Scripts.model import create_cam, create_model |
|
from Scripts.preprocess import crop_face, extract_face, extract_frames |
|
from Scripts.ca_generator import get_augs |
|
from Scripts.sbi_generator import (IoUfrom2bboxes, get_dlib_landmarks, |
|
get_retina_bbox, get_transforms, |
|
reorder_landmark, self_blending) |
|
|
|
warnings.filterwarnings('ignore') |
|
|
|
|
|
|
|
device = torch.device('cpu') |
|
|
|
sbcl = create_model("Weights/94_0.9485_val.tar") |
|
sbi = create_model("Weights/FFc23.tar") |
|
|
|
|
|
face_detector = get_model("resnet50_2020-07-20", max_size=1024, device=device) |
|
face_detector.eval() |
|
|
|
|
|
cam_sbi = create_cam(sbi) |
|
cam_sbcl = create_cam(sbcl) |
|
targets = [ClassifierOutputTarget(1)] |
|
|
|
|
|
examples = ["Examples/Fake/fake1.png", "Examples/Real/real1.png", "Examples/Real/real2.png", "Examples/Fake/fake3.png", "Examples/Real/real3.png", |
|
"Examples/Fake/fake4.png", "Examples/Real/real4.png", "Examples/Fake/fake5.png", "Examples/Fake/fake6.png", "Examples/Fake/fake7.png", ] |
|
examples_videos = ['Examples/Fake1.mp4', 'Examples/Real1.mp4'] |
|
examples_sbi = ["Examples/Fake/fake1.png", "Examples/Real/real1.png", "Examples/Real/real2.png", "Examples/Fake/fake3.png", "Examples/Real/real3.png", |
|
"Examples/Fake/fake4.png", "Examples/Fake/fake5.png", ] |
|
|
|
|
|
dlib_face_detector = dlib.get_frontal_face_detector() |
|
dlib_face_predictor = dlib.shape_predictor( |
|
'Weights/shape_predictor_81_face_landmarks.dat') |
|
|
|
|
|
def generate_sbi(inp, blending_type, face_region): |
|
""" |
|
Visualizes the different steps in the self-blended image generation process for both RGB image and mask |
|
""" |
|
|
|
landmark = get_dlib_landmarks( |
|
inp, dlib_face_detector, dlib_face_predictor)[0] |
|
bbox_lm = np.array([landmark[:, 0].min(), landmark[:, 1].min(), |
|
landmark[:, 0].max(), landmark[:, 1].max()]) |
|
bboxes = get_retina_bbox(inp, face_detector)[:2] |
|
|
|
|
|
iou_max = -1 |
|
for i in range(len(bboxes)): |
|
iou = IoUfrom2bboxes(bbox_lm, bboxes[i].flatten()) |
|
if iou_max < iou: |
|
bbox = bboxes[i] |
|
iou_max = iou |
|
|
|
|
|
landmarks = reorder_landmark(landmark) |
|
img, landmarks, bbox, __ = crop_face( |
|
inp, landmarks, bbox, margin=True, crop_by_bbox=False) |
|
cropped_input_face = img |
|
|
|
|
|
img_r_before_both_transforms, img_f_before_both_transforms, mask, mask_original, source_before_affine_transforms, _, source_after_affine_transforms, mask_after_affine_transforms = self_blending( |
|
img.copy(), landmark.copy(), blending_type, face_region) |
|
|
|
|
|
transformed = get_transforms()(image=img_f_before_both_transforms.astype( |
|
'uint8'), image1=img_r_before_both_transforms.astype('uint8')) |
|
img_f_after_both_transforms, img_r_after_both_transforms = transformed[ |
|
'image'], transformed['image1'] |
|
|
|
|
|
img_f, _, __, ___, y0_new, y1_new, x0_new, x1_new = crop_face( |
|
img_f_after_both_transforms, landmark, bbox, margin=False, crop_by_bbox=True, abs_coord=True, phase='train') |
|
img_r = img_r_after_both_transforms[y0_new:y1_new, x0_new:x1_new] |
|
img_f, img_r = cv2.resize(img_f, (380, 380), interpolation=cv2.INTER_LINEAR), cv2.resize( |
|
img_r, (380, 380), interpolation=cv2.INTER_LINEAR) |
|
|
|
|
|
mask, mask_original = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB), cv2.cvtColor( |
|
mask_original, cv2.COLOR_GRAY2RGB) |
|
mask_after_affine_transforms = cv2.cvtColor( |
|
mask_after_affine_transforms, cv2.COLOR_GRAY2RGB) |
|
return cropped_input_face, img_r_before_both_transforms, img_f_before_both_transforms, img_r_after_both_transforms, img_f_after_both_transforms,\ |
|
img_r, img_f, mask, mask_original, source_before_affine_transforms, source_after_affine_transforms, mask_after_affine_transforms |
|
|
|
|
|
def generate_ca(inp): |
|
""" |
|
Applies consistency augmentations to the given input face |
|
""" |
|
try: |
|
face = extract_face(inp, face_detector)[0].transpose(1, 2, 0) |
|
except: |
|
raise Exception("No faces detected") |
|
randomErasing, randomCropping, dfdc = get_augs("REAlbu"), get_augs("RandCropAlbu"), get_augs("DFDCAlbu") |
|
return face, randomErasing(image=face)['image'], randomCropping(image=face)['image'], dfdc(image=face)['image'] |
|
|
|
|
|
def predict_image(inp, model): |
|
""" |
|
Performs inference for a given input image and returns the prediction and CAM image. |
|
""" |
|
face_list = extract_face(inp, face_detector) |
|
|
|
if len(face_list) == 0: |
|
return {'No face detected!': 1}, None, None |
|
|
|
with torch.no_grad(): |
|
img = torch.tensor(face_list).to(device).float()/255 |
|
|
|
if model == "Self-Blended Images": |
|
pred = sbi(img).softmax(1)[:, 1].cpu().data.numpy().tolist()[0] |
|
else: |
|
pred = sbcl(img).softmax(1)[:, 1].cpu().data.numpy().tolist()[0] |
|
|
|
confidences = {'Real': 1-pred, 'Fake': pred} |
|
|
|
if model == "Self-Blended Images": |
|
grayscale_cam = cam_sbi( |
|
input_tensor=img, targets=targets, aug_smooth=True) |
|
else: |
|
grayscale_cam = cam_sbcl( |
|
input_tensor=img, targets=targets, aug_smooth=True) |
|
grayscale_cam = grayscale_cam[0, :] |
|
cam_image = show_cam_on_image(face_list[0].transpose( |
|
1, 2, 0)/255, grayscale_cam, use_rgb=True) |
|
|
|
return confidences, cam_image |
|
|
|
|
|
def predict_video(inp, model): |
|
""" |
|
Performs inference for a given input video and returns the prediction and CAM image of the frame with the highest fake probability. |
|
""" |
|
face_list, idx_list = extract_frames(inp, 10, face_detector) |
|
|
|
with torch.no_grad(): |
|
img = torch.tensor(face_list).to(device).float()/255 |
|
if model == "Self-Blended Images": |
|
pred = sbi(img).softmax(1)[:, 1] |
|
else: |
|
pred = sbcl(img).softmax(1)[:, 1] |
|
|
|
pred_list = [] |
|
idx_img = -1 |
|
for i in range(len(pred)): |
|
if idx_list[i] != idx_img: |
|
pred_list.append([]) |
|
idx_img = idx_list[i] |
|
pred_list[-1].append(pred[i].item()) |
|
pred_res = np.zeros(len(pred_list)) |
|
for i in range(len(pred_res)): |
|
pred_res[i] = max(pred_list[i]) |
|
pred = pred_res.mean() |
|
|
|
most_fake = np.argmax(pred_res) |
|
if model == "Self-Blended Images": |
|
grayscale_cam = cam_sbi(input_tensor=img[most_fake].unsqueeze( |
|
0), targets=targets, aug_smooth=True) |
|
else: |
|
grayscale_cam = cam_sbcl(input_tensor=img[most_fake].unsqueeze( |
|
0), targets=targets, aug_smooth=True) |
|
grayscale_cam = grayscale_cam[0, :] |
|
cam_image = show_cam_on_image(face_list[most_fake].transpose( |
|
1, 2, 0)/255, grayscale_cam, use_rgb=True) |
|
|
|
return {'Real': 1-pred, 'Fake': pred}, cam_image |
|
|
|
|
|
with gr.Blocks(title="Self-Blended Consistency Learning", css="#custom_header {min-height: 3rem} #custom_title {min-height: 3rem; text-align: center}") as demo: |
|
gr.Markdown("# Face Forgery Detector", elem_id="custom_title") |
|
gr.Markdown("Gradio Demo for 'Face Forgery Detection with Self-Blended Consistency Learning'. To use it, simply upload your image, or click one of the examples to load them. Paper to be available on ArXiv in the near future.", elem_id="custom_title") |
|
|
|
with gr.Tab("Image Inference"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Box(): |
|
gr.Markdown("## Inputs", elem_id="custom_header") |
|
input_image = gr.Image(label="Input Image") |
|
input_image.style(height=240) |
|
model_selection = gr.inputs.Radio(['Self-Blended Images', 'Self-Blended Consistency Learning'], |
|
type="value", default='Self-Blended Consistency Learning', label='Model') |
|
btn = gr.Button(value="Submit") |
|
btn.style(full_width=True) |
|
with gr.Column(): |
|
with gr.Box(): |
|
gr.Markdown("## Outputs", elem_id="custom_header") |
|
output_image = gr.Image(label="GradCAM Image") |
|
output_image.style(height=240) |
|
label_probs = gr.outputs.Label() |
|
gr.Examples( |
|
examples=examples, |
|
inputs=input_image, |
|
outputs=output_image, |
|
fn=predict_image, |
|
cache_examples=False, |
|
) |
|
with gr.Tab("Video Inference"): |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Box(): |
|
gr.Markdown("## Inputs", elem_id="custom_header") |
|
input_video = gr.Video(label="Input Video") |
|
input_video.style(height=240) |
|
model_selection_video = gr.inputs.Radio( |
|
['Self-Blended Images', 'Self-Blended Consistency Learning'], type="value", default='Self-Blended Consistency Learning', label='Model') |
|
btn_video = gr.Button(value="Submit") |
|
btn_video.style(full_width=True) |
|
|
|
with gr.Column(): |
|
with gr.Box(): |
|
gr.Markdown("## Outputs", elem_id="custom_header") |
|
output_image_video = gr.Image(label="GradCAM Image") |
|
output_image_video.style(height=240) |
|
label_probs_video = gr.outputs.Label() |
|
gr.Examples( |
|
examples=examples_videos, |
|
inputs=input_video, |
|
outputs=output_image_video, |
|
fn=predict_video, |
|
cache_examples=False, |
|
) |
|
|
|
with gr.Tab("SBI Generator"): |
|
gr.Markdown("Input an image with a face to visualize the steps involved in the self-blended image (SBI) generation. Values for augmentations are randomly chosen. Blending type and face region can be varied. \ |
|
This process is a slightly modified version of the process from 'Detecting Deepfakes with Self-Blended Images (CVPR 2022)'", elem_id="custom_header") |
|
with gr.Row(): |
|
with gr.Column(): |
|
with gr.Box(): |
|
gr.Markdown("## Inputs", elem_id="custom_header") |
|
input_image_sbi = gr.Image(label="Input Image") |
|
input_image_sbi.style(height=240) |
|
btn_sbi = gr.Button(value="Submit") |
|
btn_sbi.style(full_width=True) |
|
with gr.Row(): |
|
blending_type = gr.Radio( |
|
["Poisson", "Mixup"], label="Blending Type", value="Poisson", interactive=True) |
|
face_region = gr.Radio( |
|
["1", "2", "3", "4"], label="Face Region", value="1", interactive=True) |
|
gr.Examples( |
|
examples=examples_sbi, |
|
inputs=input_image_sbi, |
|
fn=generate_sbi, |
|
cache_examples=False, |
|
) |
|
with gr.Row(): |
|
with gr.Box(): |
|
with gr.Column(): |
|
gr.Markdown("# Self-Blended Image Generation", |
|
elem_id="custom_header") |
|
|
|
with gr.Box(): |
|
gr.Markdown("## Step 1", elem_id="custom_header") |
|
gr.Markdown( |
|
"Using facial landmarks models, obtain face bounding box and facial landmarks to crop face and produce mask.", elem_id="custom_header") |
|
with gr.Row(): |
|
cropped_input_face = gr.Image( |
|
label="Input face after cropping") |
|
cropped_input_face.style(height=240) |
|
mask_original = gr.Image(label="Original mask") |
|
mask_original.style(height=240) |
|
gr.Markdown("The cropped input face is duplicated to become a 'source' face and a 'target' face. Eventually, the source face will be blended onto the target face after augmentations done below.", elem_id="custom_header") |
|
|
|
with gr.Box(): |
|
gr.Markdown("## Step 2", elem_id="custom_header") |
|
gr.Markdown("Apply source-target augmentations", |
|
elem_id="custom_header") |
|
with gr.Row(): |
|
source_before_affine_transforms = gr.Image( |
|
label="Source face after source-target augmentations") |
|
source_before_affine_transforms.style(height=240) |
|
gr.Markdown("In this case, the source-target augmentations are applied to the source image for straight-forward visualization. In actual training,\ |
|
the augmentations are applied to either source or target face with 1:1 probability. Augmentations applied here \ |
|
include RGBShift, HueSaturationValue, RandomBrightnessContrast, RandomDownScale, Sharpen from Albumentations.") |
|
|
|
with gr.Box(): |
|
gr.Markdown("## Step 3", elem_id="custom_header") |
|
gr.Markdown( |
|
"Apply affine/elastic augmentations to augmented source image/mask", elem_id="custom_header") |
|
with gr.Row(): |
|
source_after_affine_transforms = gr.Image( |
|
label="Source face after affine augmentations") |
|
source_after_affine_transforms.style(height=240) |
|
|
|
mask_after_affine_transforms = gr.Image( |
|
label="Mask after elastic augmentations") |
|
mask_after_affine_transforms.style(height=240) |
|
|
|
with gr.Box(): |
|
gr.Markdown("## Step 4", elem_id="custom_header") |
|
gr.Markdown( |
|
"Apply smoothing augmentations to mask for gentle blending", elem_id="custom_header") |
|
mask = gr.Image(label="Mask after Gaussian smoothing") |
|
mask.style(height=240) |
|
|
|
with gr.Box(): |
|
gr.Markdown("## Step 5", elem_id="custom_header") |
|
gr.Markdown( |
|
"Perform blending (based on chosen blending option) to produce self-blended fake", elem_id="custom_header") |
|
with gr.Row(): |
|
img_r_before_both_transforms = gr.Image( |
|
label="Real face (for comparison)") |
|
img_r_before_both_transforms.style(height=240) |
|
|
|
img_f_before_both_transforms = gr.Image( |
|
label="Self-blended fake face") |
|
img_f_before_both_transforms.style(height=240) |
|
|
|
with gr.Box(): |
|
gr.Markdown("## Step 6", elem_id="custom_header") |
|
gr.Markdown( |
|
"Apply the same randomly chosen augmentations to both real and self-blended fake", elem_id="custom_header") |
|
with gr.Row(): |
|
img_r_after_both_transforms = gr.Image( |
|
label="Real face after augmentations") |
|
img_r_after_both_transforms.style(height=240) |
|
|
|
img_f_after_both_transforms = gr.Image( |
|
label="Self-blended fake face after augmentations") |
|
img_f_after_both_transforms.style(height=240) |
|
gr.Markdown( |
|
"Augmentations applied here include RGBShift, HueSaturationValue, RandomBrightnessContrast, ImageCompression from Albumentations.") |
|
|
|
with gr.Box(): |
|
gr.Markdown("## Step 7", elem_id="custom_header") |
|
gr.Markdown( |
|
"Crop real and self-blended fake to only have the faces", elem_id="custom_header") |
|
with gr.Row(): |
|
output_r = gr.Image(label="Final real face") |
|
output_r.style(height=240) |
|
|
|
output_f = gr.Image(label="Final SBI face") |
|
output_f.style(height=240) |
|
gr.Markdown( |
|
"These are the images that are eventually fed into the model for training", elem_id="custom_header") |
|
|
|
with gr.Tab("Consistency Augmentations"): |
|
gr.Markdown("Input an image with a face to visualize the consistency augmentations. Values for augmentations are randomly chosen.", elem_id="custom_header") |
|
with gr.Row(): |
|
with gr.Box(): |
|
gr.Markdown("## Input", elem_id="custom_header") |
|
input_image_ca = gr.Image(label="Input Image") |
|
input_image_ca.style(height=240) |
|
btn_ca = gr.Button(value="Submit") |
|
btn_ca.style(full_width=True) |
|
gr.Examples( |
|
examples=examples_sbi, |
|
inputs=input_image_ca, |
|
fn=generate_ca, |
|
cache_examples=False, |
|
) |
|
with gr.Row(): |
|
with gr.Box(): |
|
with gr.Row(): |
|
og = gr.Image(label="Cropped Face (No augs)") |
|
og.style(height=240) |
|
re = gr.Image(label="Random Erasing") |
|
re.style(height=240) |
|
rc = gr.Image(label="Random Cropping") |
|
rc.style(height=240) |
|
dfdc = gr.Image(label="DFDC Selimsef") |
|
dfdc.style(height=240) |
|
|
|
btn.click(predict_image, inputs=[input_image, model_selection], outputs=[ |
|
label_probs, output_image]) |
|
btn_video.click(predict_video, inputs=[input_video, model_selection_video], outputs=[ |
|
label_probs_video, output_image_video]) |
|
btn_sbi.click(generate_sbi, inputs=[input_image_sbi, blending_type, face_region], outputs=[cropped_input_face, img_r_before_both_transforms, img_f_before_both_transforms, |
|
img_r_after_both_transforms, img_f_after_both_transforms, output_r, output_f, mask, |
|
mask_original, source_before_affine_transforms, source_after_affine_transforms, mask_after_affine_transforms]) |
|
btn_ca.click(generate_ca, inputs=[ |
|
input_image_ca], outputs=[og, re, rc, dfdc]) |
|
if __name__ == "__main__": |
|
demo.launch() |
|
|