8testiaa4 / app.py
ismot's picture
Duplicate from pyesonekyaw/faceforgerydetection
2f99bb4
import warnings
import cv2
import dlib
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
import gradio as gr
import numpy as np
import torch
from retinaface.pre_trained_models import get_model
from Scripts.model import create_cam, create_model
from Scripts.preprocess import crop_face, extract_face, extract_frames
from Scripts.ca_generator import get_augs
from Scripts.sbi_generator import (IoUfrom2bboxes, get_dlib_landmarks,
get_retina_bbox, get_transforms,
reorder_landmark, self_blending)
warnings.filterwarnings('ignore')
# Model Initialization
device = torch.device('cpu')
sbcl = create_model("Weights/94_0.9485_val.tar")
sbi = create_model("Weights/FFc23.tar")
# Face Detector Initialization
face_detector = get_model("resnet50_2020-07-20", max_size=1024, device=device)
face_detector.eval()
# Grad-CAM Initialization
cam_sbi = create_cam(sbi)
cam_sbcl = create_cam(sbcl)
targets = [ClassifierOutputTarget(1)]
# Examples
examples = ["Examples/Fake/fake1.png", "Examples/Real/real1.png", "Examples/Real/real2.png", "Examples/Fake/fake3.png", "Examples/Real/real3.png",
"Examples/Fake/fake4.png", "Examples/Real/real4.png", "Examples/Fake/fake5.png", "Examples/Fake/fake6.png", "Examples/Fake/fake7.png", ]
examples_videos = ['Examples/Fake1.mp4', 'Examples/Real1.mp4']
examples_sbi = ["Examples/Fake/fake1.png", "Examples/Real/real1.png", "Examples/Real/real2.png", "Examples/Fake/fake3.png", "Examples/Real/real3.png",
"Examples/Fake/fake4.png", "Examples/Fake/fake5.png", ]
# dlib Models
dlib_face_detector = dlib.get_frontal_face_detector()
dlib_face_predictor = dlib.shape_predictor(
'Weights/shape_predictor_81_face_landmarks.dat')
def generate_sbi(inp, blending_type, face_region):
"""
Visualizes the different steps in the self-blended image generation process for both RGB image and mask
"""
# Getting face bboxes and landmarks
landmark = get_dlib_landmarks(
inp, dlib_face_detector, dlib_face_predictor)[0]
bbox_lm = np.array([landmark[:, 0].min(), landmark[:, 1].min(),
landmark[:, 0].max(), landmark[:, 1].max()])
bboxes = get_retina_bbox(inp, face_detector)[:2]
# Reducing bboxes to just one if multiple
iou_max = -1
for i in range(len(bboxes)):
iou = IoUfrom2bboxes(bbox_lm, bboxes[i].flatten())
if iou_max < iou:
bbox = bboxes[i]
iou_max = iou
# Input cropping
landmarks = reorder_landmark(landmark)
img, landmarks, bbox, __ = crop_face(
inp, landmarks, bbox, margin=True, crop_by_bbox=False)
cropped_input_face = img
# Blending
img_r_before_both_transforms, img_f_before_both_transforms, mask, mask_original, source_before_affine_transforms, _, source_after_affine_transforms, mask_after_affine_transforms = self_blending(
img.copy(), landmark.copy(), blending_type, face_region)
# Post-blending transforms
transformed = get_transforms()(image=img_f_before_both_transforms.astype(
'uint8'), image1=img_r_before_both_transforms.astype('uint8'))
img_f_after_both_transforms, img_r_after_both_transforms = transformed[
'image'], transformed['image1']
# Crop and resize the faces
img_f, _, __, ___, y0_new, y1_new, x0_new, x1_new = crop_face(
img_f_after_both_transforms, landmark, bbox, margin=False, crop_by_bbox=True, abs_coord=True, phase='train')
img_r = img_r_after_both_transforms[y0_new:y1_new, x0_new:x1_new]
img_f, img_r = cv2.resize(img_f, (380, 380), interpolation=cv2.INTER_LINEAR), cv2.resize(
img_r, (380, 380), interpolation=cv2.INTER_LINEAR)
# Mask operations
mask, mask_original = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB), cv2.cvtColor(
mask_original, cv2.COLOR_GRAY2RGB)
mask_after_affine_transforms = cv2.cvtColor(
mask_after_affine_transforms, cv2.COLOR_GRAY2RGB)
return cropped_input_face, img_r_before_both_transforms, img_f_before_both_transforms, img_r_after_both_transforms, img_f_after_both_transforms,\
img_r, img_f, mask, mask_original, source_before_affine_transforms, source_after_affine_transforms, mask_after_affine_transforms
def generate_ca(inp):
"""
Applies consistency augmentations to the given input face
"""
try:
face = extract_face(inp, face_detector)[0].transpose(1, 2, 0)
except:
raise Exception("No faces detected")
randomErasing, randomCropping, dfdc = get_augs("REAlbu"), get_augs("RandCropAlbu"), get_augs("DFDCAlbu")
return face, randomErasing(image=face)['image'], randomCropping(image=face)['image'], dfdc(image=face)['image']
def predict_image(inp, model):
"""
Performs inference for a given input image and returns the prediction and CAM image.
"""
face_list = extract_face(inp, face_detector)
if len(face_list) == 0:
return {'No face detected!': 1}, None, None
with torch.no_grad():
img = torch.tensor(face_list).to(device).float()/255
if model == "Self-Blended Images":
pred = sbi(img).softmax(1)[:, 1].cpu().data.numpy().tolist()[0]
else:
pred = sbcl(img).softmax(1)[:, 1].cpu().data.numpy().tolist()[0]
confidences = {'Real': 1-pred, 'Fake': pred}
if model == "Self-Blended Images":
grayscale_cam = cam_sbi(
input_tensor=img, targets=targets, aug_smooth=True)
else:
grayscale_cam = cam_sbcl(
input_tensor=img, targets=targets, aug_smooth=True)
grayscale_cam = grayscale_cam[0, :]
cam_image = show_cam_on_image(face_list[0].transpose(
1, 2, 0)/255, grayscale_cam, use_rgb=True)
return confidences, cam_image
def predict_video(inp, model):
"""
Performs inference for a given input video and returns the prediction and CAM image of the frame with the highest fake probability.
"""
face_list, idx_list = extract_frames(inp, 10, face_detector)
with torch.no_grad():
img = torch.tensor(face_list).to(device).float()/255
if model == "Self-Blended Images":
pred = sbi(img).softmax(1)[:, 1]
else:
pred = sbcl(img).softmax(1)[:, 1]
pred_list = []
idx_img = -1
for i in range(len(pred)):
if idx_list[i] != idx_img:
pred_list.append([])
idx_img = idx_list[i]
pred_list[-1].append(pred[i].item())
pred_res = np.zeros(len(pred_list))
for i in range(len(pred_res)):
pred_res[i] = max(pred_list[i])
pred = pred_res.mean()
most_fake = np.argmax(pred_res)
if model == "Self-Blended Images":
grayscale_cam = cam_sbi(input_tensor=img[most_fake].unsqueeze(
0), targets=targets, aug_smooth=True)
else:
grayscale_cam = cam_sbcl(input_tensor=img[most_fake].unsqueeze(
0), targets=targets, aug_smooth=True)
grayscale_cam = grayscale_cam[0, :]
cam_image = show_cam_on_image(face_list[most_fake].transpose(
1, 2, 0)/255, grayscale_cam, use_rgb=True)
return {'Real': 1-pred, 'Fake': pred}, cam_image
with gr.Blocks(title="Self-Blended Consistency Learning", css="#custom_header {min-height: 3rem} #custom_title {min-height: 3rem; text-align: center}") as demo:
gr.Markdown("# Face Forgery Detector", elem_id="custom_title")
gr.Markdown("Gradio Demo for 'Face Forgery Detection with Self-Blended Consistency Learning'. To use it, simply upload your image, or click one of the examples to load them. Paper to be available on ArXiv in the near future.", elem_id="custom_title")
with gr.Tab("Image Inference"):
with gr.Row():
with gr.Column():
with gr.Box():
gr.Markdown("## Inputs", elem_id="custom_header")
input_image = gr.Image(label="Input Image")
input_image.style(height=240)
model_selection = gr.inputs.Radio(['Self-Blended Images', 'Self-Blended Consistency Learning'],
type="value", default='Self-Blended Consistency Learning', label='Model')
btn = gr.Button(value="Submit")
btn.style(full_width=True)
with gr.Column():
with gr.Box():
gr.Markdown("## Outputs", elem_id="custom_header")
output_image = gr.Image(label="GradCAM Image")
output_image.style(height=240)
label_probs = gr.outputs.Label()
gr.Examples(
examples=examples,
inputs=input_image,
outputs=output_image,
fn=predict_image,
cache_examples=False,
)
with gr.Tab("Video Inference"):
with gr.Row():
with gr.Column():
with gr.Box():
gr.Markdown("## Inputs", elem_id="custom_header")
input_video = gr.Video(label="Input Video")
input_video.style(height=240)
model_selection_video = gr.inputs.Radio(
['Self-Blended Images', 'Self-Blended Consistency Learning'], type="value", default='Self-Blended Consistency Learning', label='Model')
btn_video = gr.Button(value="Submit")
btn_video.style(full_width=True)
with gr.Column():
with gr.Box():
gr.Markdown("## Outputs", elem_id="custom_header")
output_image_video = gr.Image(label="GradCAM Image")
output_image_video.style(height=240)
label_probs_video = gr.outputs.Label()
gr.Examples(
examples=examples_videos,
inputs=input_video,
outputs=output_image_video,
fn=predict_video,
cache_examples=False,
)
with gr.Tab("SBI Generator"):
gr.Markdown("Input an image with a face to visualize the steps involved in the self-blended image (SBI) generation. Values for augmentations are randomly chosen. Blending type and face region can be varied. \
This process is a slightly modified version of the process from 'Detecting Deepfakes with Self-Blended Images (CVPR 2022)'", elem_id="custom_header")
with gr.Row():
with gr.Column():
with gr.Box():
gr.Markdown("## Inputs", elem_id="custom_header")
input_image_sbi = gr.Image(label="Input Image")
input_image_sbi.style(height=240)
btn_sbi = gr.Button(value="Submit")
btn_sbi.style(full_width=True)
with gr.Row():
blending_type = gr.Radio(
["Poisson", "Mixup"], label="Blending Type", value="Poisson", interactive=True)
face_region = gr.Radio(
["1", "2", "3", "4"], label="Face Region", value="1", interactive=True)
gr.Examples(
examples=examples_sbi,
inputs=input_image_sbi,
fn=generate_sbi,
cache_examples=False,
)
with gr.Row():
with gr.Box():
with gr.Column():
gr.Markdown("# Self-Blended Image Generation",
elem_id="custom_header")
with gr.Box():
gr.Markdown("## Step 1", elem_id="custom_header")
gr.Markdown(
"Using facial landmarks models, obtain face bounding box and facial landmarks to crop face and produce mask.", elem_id="custom_header")
with gr.Row():
cropped_input_face = gr.Image(
label="Input face after cropping")
cropped_input_face.style(height=240)
mask_original = gr.Image(label="Original mask")
mask_original.style(height=240)
gr.Markdown("The cropped input face is duplicated to become a 'source' face and a 'target' face. Eventually, the source face will be blended onto the target face after augmentations done below.", elem_id="custom_header")
with gr.Box():
gr.Markdown("## Step 2", elem_id="custom_header")
gr.Markdown("Apply source-target augmentations",
elem_id="custom_header")
with gr.Row():
source_before_affine_transforms = gr.Image(
label="Source face after source-target augmentations")
source_before_affine_transforms.style(height=240)
gr.Markdown("In this case, the source-target augmentations are applied to the source image for straight-forward visualization. In actual training,\
the augmentations are applied to either source or target face with 1:1 probability. Augmentations applied here \
include RGBShift, HueSaturationValue, RandomBrightnessContrast, RandomDownScale, Sharpen from Albumentations.")
with gr.Box():
gr.Markdown("## Step 3", elem_id="custom_header")
gr.Markdown(
"Apply affine/elastic augmentations to augmented source image/mask", elem_id="custom_header")
with gr.Row():
source_after_affine_transforms = gr.Image(
label="Source face after affine augmentations")
source_after_affine_transforms.style(height=240)
mask_after_affine_transforms = gr.Image(
label="Mask after elastic augmentations")
mask_after_affine_transforms.style(height=240)
with gr.Box():
gr.Markdown("## Step 4", elem_id="custom_header")
gr.Markdown(
"Apply smoothing augmentations to mask for gentle blending", elem_id="custom_header")
mask = gr.Image(label="Mask after Gaussian smoothing")
mask.style(height=240)
with gr.Box():
gr.Markdown("## Step 5", elem_id="custom_header")
gr.Markdown(
"Perform blending (based on chosen blending option) to produce self-blended fake", elem_id="custom_header")
with gr.Row():
img_r_before_both_transforms = gr.Image(
label="Real face (for comparison)")
img_r_before_both_transforms.style(height=240)
img_f_before_both_transforms = gr.Image(
label="Self-blended fake face")
img_f_before_both_transforms.style(height=240)
with gr.Box():
gr.Markdown("## Step 6", elem_id="custom_header")
gr.Markdown(
"Apply the same randomly chosen augmentations to both real and self-blended fake", elem_id="custom_header")
with gr.Row():
img_r_after_both_transforms = gr.Image(
label="Real face after augmentations")
img_r_after_both_transforms.style(height=240)
img_f_after_both_transforms = gr.Image(
label="Self-blended fake face after augmentations")
img_f_after_both_transforms.style(height=240)
gr.Markdown(
"Augmentations applied here include RGBShift, HueSaturationValue, RandomBrightnessContrast, ImageCompression from Albumentations.")
with gr.Box():
gr.Markdown("## Step 7", elem_id="custom_header")
gr.Markdown(
"Crop real and self-blended fake to only have the faces", elem_id="custom_header")
with gr.Row():
output_r = gr.Image(label="Final real face")
output_r.style(height=240)
output_f = gr.Image(label="Final SBI face")
output_f.style(height=240)
gr.Markdown(
"These are the images that are eventually fed into the model for training", elem_id="custom_header")
with gr.Tab("Consistency Augmentations"):
gr.Markdown("Input an image with a face to visualize the consistency augmentations. Values for augmentations are randomly chosen.", elem_id="custom_header")
with gr.Row():
with gr.Box():
gr.Markdown("## Input", elem_id="custom_header")
input_image_ca = gr.Image(label="Input Image")
input_image_ca.style(height=240)
btn_ca = gr.Button(value="Submit")
btn_ca.style(full_width=True)
gr.Examples(
examples=examples_sbi,
inputs=input_image_ca,
fn=generate_ca,
cache_examples=False,
)
with gr.Row():
with gr.Box():
with gr.Row():
og = gr.Image(label="Cropped Face (No augs)")
og.style(height=240)
re = gr.Image(label="Random Erasing")
re.style(height=240)
rc = gr.Image(label="Random Cropping")
rc.style(height=240)
dfdc = gr.Image(label="DFDC Selimsef")
dfdc.style(height=240)
btn.click(predict_image, inputs=[input_image, model_selection], outputs=[
label_probs, output_image])
btn_video.click(predict_video, inputs=[input_video, model_selection_video], outputs=[
label_probs_video, output_image_video])
btn_sbi.click(generate_sbi, inputs=[input_image_sbi, blending_type, face_region], outputs=[cropped_input_face, img_r_before_both_transforms, img_f_before_both_transforms,
img_r_after_both_transforms, img_f_after_both_transforms, output_r, output_f, mask,
mask_original, source_before_affine_transforms, source_after_affine_transforms, mask_after_affine_transforms])
btn_ca.click(generate_ca, inputs=[
input_image_ca], outputs=[og, re, rc, dfdc])
if __name__ == "__main__":
demo.launch()