8testiaa4

Runtime error

File size: 19,297 Bytes

2f99bb4

import warnings
import cv2
import dlib
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
import gradio as gr
import numpy as np
import torch
from retinaface.pre_trained_models import get_model

from Scripts.model import create_cam, create_model
from Scripts.preprocess import crop_face, extract_face, extract_frames
from Scripts.ca_generator import get_augs
from Scripts.sbi_generator import (IoUfrom2bboxes, get_dlib_landmarks,
                                   get_retina_bbox, get_transforms,
                                   reorder_landmark, self_blending)

warnings.filterwarnings('ignore')


# Model Initialization
device = torch.device('cpu')

sbcl = create_model("Weights/94_0.9485_val.tar")
sbi = create_model("Weights/FFc23.tar")

# Face Detector Initialization
face_detector = get_model("resnet50_2020-07-20", max_size=1024, device=device)
face_detector.eval()

# Grad-CAM Initialization
cam_sbi = create_cam(sbi)
cam_sbcl = create_cam(sbcl)
targets = [ClassifierOutputTarget(1)]

# Examples
examples = ["Examples/Fake/fake1.png", "Examples/Real/real1.png", "Examples/Real/real2.png", "Examples/Fake/fake3.png", "Examples/Real/real3.png",
            "Examples/Fake/fake4.png", "Examples/Real/real4.png", "Examples/Fake/fake5.png", "Examples/Fake/fake6.png", "Examples/Fake/fake7.png", ]
examples_videos = ['Examples/Fake1.mp4', 'Examples/Real1.mp4']
examples_sbi = ["Examples/Fake/fake1.png", "Examples/Real/real1.png", "Examples/Real/real2.png", "Examples/Fake/fake3.png", "Examples/Real/real3.png",
                "Examples/Fake/fake4.png", "Examples/Fake/fake5.png", ]

# dlib Models
dlib_face_detector = dlib.get_frontal_face_detector()
dlib_face_predictor = dlib.shape_predictor(
    'Weights/shape_predictor_81_face_landmarks.dat')


def generate_sbi(inp, blending_type, face_region):
    """
    Visualizes the different steps in the self-blended image generation process for both RGB image and mask
    """
    # Getting face bboxes and landmarks
    landmark = get_dlib_landmarks(
        inp, dlib_face_detector, dlib_face_predictor)[0]
    bbox_lm = np.array([landmark[:, 0].min(), landmark[:, 1].min(),
                        landmark[:, 0].max(), landmark[:, 1].max()])
    bboxes = get_retina_bbox(inp, face_detector)[:2]

    # Reducing bboxes to just one if multiple
    iou_max = -1
    for i in range(len(bboxes)):
        iou = IoUfrom2bboxes(bbox_lm, bboxes[i].flatten())
        if iou_max < iou:
            bbox = bboxes[i]
            iou_max = iou

    # Input cropping
    landmarks = reorder_landmark(landmark)
    img, landmarks, bbox, __ = crop_face(
        inp, landmarks, bbox, margin=True, crop_by_bbox=False)
    cropped_input_face = img

    # Blending
    img_r_before_both_transforms, img_f_before_both_transforms, mask, mask_original, source_before_affine_transforms, _, source_after_affine_transforms, mask_after_affine_transforms = self_blending(
        img.copy(), landmark.copy(), blending_type, face_region)

    # Post-blending transforms
    transformed = get_transforms()(image=img_f_before_both_transforms.astype(
        'uint8'), image1=img_r_before_both_transforms.astype('uint8'))
    img_f_after_both_transforms, img_r_after_both_transforms = transformed[
        'image'], transformed['image1']

    # Crop and resize the faces
    img_f, _, __, ___, y0_new, y1_new, x0_new, x1_new = crop_face(
        img_f_after_both_transforms, landmark, bbox, margin=False, crop_by_bbox=True, abs_coord=True, phase='train')
    img_r = img_r_after_both_transforms[y0_new:y1_new, x0_new:x1_new]
    img_f, img_r = cv2.resize(img_f, (380, 380), interpolation=cv2.INTER_LINEAR), cv2.resize(
        img_r, (380, 380), interpolation=cv2.INTER_LINEAR)

    # Mask operations
    mask, mask_original = cv2.cvtColor(mask, cv2.COLOR_GRAY2RGB), cv2.cvtColor(
        mask_original, cv2.COLOR_GRAY2RGB)
    mask_after_affine_transforms = cv2.cvtColor(
        mask_after_affine_transforms, cv2.COLOR_GRAY2RGB)
    return cropped_input_face, img_r_before_both_transforms, img_f_before_both_transforms, img_r_after_both_transforms, img_f_after_both_transforms,\
         img_r, img_f, mask, mask_original, source_before_affine_transforms, source_after_affine_transforms, mask_after_affine_transforms


def generate_ca(inp):
    """
    Applies consistency augmentations to the given input face
    """
    try:
        face = extract_face(inp, face_detector)[0].transpose(1, 2, 0)
    except:
        raise Exception("No faces detected")
    randomErasing, randomCropping, dfdc = get_augs("REAlbu"), get_augs("RandCropAlbu"), get_augs("DFDCAlbu")
    return face, randomErasing(image=face)['image'], randomCropping(image=face)['image'], dfdc(image=face)['image']


def predict_image(inp, model):
    """
    Performs inference for a given input image and returns the prediction and CAM image.
    """
    face_list = extract_face(inp, face_detector)

    if len(face_list) == 0:
        return {'No face detected!': 1}, None, None

    with torch.no_grad():
        img = torch.tensor(face_list).to(device).float()/255

        if model == "Self-Blended Images":
            pred = sbi(img).softmax(1)[:, 1].cpu().data.numpy().tolist()[0]
        else:
            pred = sbcl(img).softmax(1)[:, 1].cpu().data.numpy().tolist()[0]

        confidences = {'Real': 1-pred, 'Fake': pred}

    if model == "Self-Blended Images":
        grayscale_cam = cam_sbi(
            input_tensor=img, targets=targets, aug_smooth=True)
    else:
        grayscale_cam = cam_sbcl(
            input_tensor=img, targets=targets, aug_smooth=True)
    grayscale_cam = grayscale_cam[0, :]
    cam_image = show_cam_on_image(face_list[0].transpose(
        1, 2, 0)/255, grayscale_cam, use_rgb=True)

    return confidences, cam_image


def predict_video(inp, model):
    """
    Performs inference for a given input video and returns the prediction and CAM image of the frame with the highest fake probability.
    """
    face_list, idx_list = extract_frames(inp, 10, face_detector)

    with torch.no_grad():
        img = torch.tensor(face_list).to(device).float()/255
        if model == "Self-Blended Images":
            pred = sbi(img).softmax(1)[:, 1]
        else:
            pred = sbcl(img).softmax(1)[:, 1]

    pred_list = []
    idx_img = -1
    for i in range(len(pred)):
        if idx_list[i] != idx_img:
            pred_list.append([])
            idx_img = idx_list[i]
        pred_list[-1].append(pred[i].item())
    pred_res = np.zeros(len(pred_list))
    for i in range(len(pred_res)):
        pred_res[i] = max(pred_list[i])
    pred = pred_res.mean()

    most_fake = np.argmax(pred_res)
    if model == "Self-Blended Images":
        grayscale_cam = cam_sbi(input_tensor=img[most_fake].unsqueeze(
            0), targets=targets, aug_smooth=True)
    else:
        grayscale_cam = cam_sbcl(input_tensor=img[most_fake].unsqueeze(
            0), targets=targets, aug_smooth=True)
    grayscale_cam = grayscale_cam[0, :]
    cam_image = show_cam_on_image(face_list[most_fake].transpose(
        1, 2, 0)/255, grayscale_cam, use_rgb=True)

    return {'Real': 1-pred, 'Fake': pred}, cam_image


with gr.Blocks(title="Self-Blended Consistency Learning", css="#custom_header {min-height: 3rem} #custom_title {min-height: 3rem; text-align: center}") as demo:
    gr.Markdown("# Face Forgery Detector", elem_id="custom_title")
    gr.Markdown("Gradio Demo for 'Face Forgery Detection with Self-Blended Consistency Learning'. To use it, simply upload your image, or click one of the examples to load them. Paper to be available on ArXiv in the near future.", elem_id="custom_title")

    with gr.Tab("Image Inference"):
        with gr.Row():
            with gr.Column():
                with gr.Box():
                    gr.Markdown("## Inputs", elem_id="custom_header")
                    input_image = gr.Image(label="Input Image")
                    input_image.style(height=240)
                    model_selection = gr.inputs.Radio(['Self-Blended Images', 'Self-Blended Consistency Learning'],
                                                      type="value", default='Self-Blended Consistency Learning', label='Model')
                    btn = gr.Button(value="Submit")
                    btn.style(full_width=True)
            with gr.Column():
                with gr.Box():
                    gr.Markdown("## Outputs", elem_id="custom_header")
                    output_image = gr.Image(label="GradCAM Image")
                    output_image.style(height=240)
                    label_probs = gr.outputs.Label()
        gr.Examples(
            examples=examples,
            inputs=input_image,
            outputs=output_image,
            fn=predict_image,
            cache_examples=False,
        )
    with gr.Tab("Video Inference"):
        with gr.Row():
            with gr.Column():
                with gr.Box():
                    gr.Markdown("## Inputs", elem_id="custom_header")
                    input_video = gr.Video(label="Input Video")
                    input_video.style(height=240)
                    model_selection_video = gr.inputs.Radio(
                        ['Self-Blended Images', 'Self-Blended Consistency Learning'], type="value", default='Self-Blended Consistency Learning', label='Model')
                    btn_video = gr.Button(value="Submit")
                    btn_video.style(full_width=True)

            with gr.Column():
                with gr.Box():
                    gr.Markdown("## Outputs", elem_id="custom_header")
                    output_image_video = gr.Image(label="GradCAM Image")
                    output_image_video.style(height=240)
                    label_probs_video = gr.outputs.Label()
        gr.Examples(
            examples=examples_videos,
            inputs=input_video,
            outputs=output_image_video,
            fn=predict_video,
            cache_examples=False,
        )

    with gr.Tab("SBI Generator"):
        gr.Markdown("Input an image with a face to visualize the steps involved in the self-blended image (SBI) generation. Values for augmentations are randomly chosen. Blending type and face region can be varied. \
                    This process is a slightly modified version of the process from 'Detecting Deepfakes with Self-Blended Images (CVPR 2022)'", elem_id="custom_header")
        with gr.Row():
            with gr.Column():
                with gr.Box():
                    gr.Markdown("## Inputs", elem_id="custom_header")
                    input_image_sbi = gr.Image(label="Input Image")
                    input_image_sbi.style(height=240)
                    btn_sbi = gr.Button(value="Submit")
                    btn_sbi.style(full_width=True)
                    with gr.Row():
                        blending_type = gr.Radio(
                            ["Poisson", "Mixup"], label="Blending Type", value="Poisson", interactive=True)
                        face_region = gr.Radio(
                            ["1", "2", "3", "4"], label="Face Region", value="1", interactive=True)
                    gr.Examples(
                        examples=examples_sbi,
                        inputs=input_image_sbi,
                        fn=generate_sbi,
                        cache_examples=False,
                    )
        with gr.Row():
            with gr.Box():
                with gr.Column():
                    gr.Markdown("# Self-Blended Image Generation",
                                elem_id="custom_header")

                    with gr.Box():
                        gr.Markdown("## Step 1", elem_id="custom_header")
                        gr.Markdown(
                            "Using facial landmarks models, obtain face bounding box and facial landmarks to crop face and produce mask.", elem_id="custom_header")
                        with gr.Row():
                            cropped_input_face = gr.Image(
                                label="Input face after cropping")
                            cropped_input_face.style(height=240)
                            mask_original = gr.Image(label="Original mask")
                            mask_original.style(height=240)
                        gr.Markdown("The cropped input face is duplicated to become a 'source' face and a 'target' face. Eventually, the source face will be blended onto the target face after augmentations done below.", elem_id="custom_header")

                    with gr.Box():
                        gr.Markdown("## Step 2", elem_id="custom_header")
                        gr.Markdown("Apply source-target augmentations",
                                    elem_id="custom_header")
                        with gr.Row():
                            source_before_affine_transforms = gr.Image(
                                label="Source face after source-target augmentations")
                            source_before_affine_transforms.style(height=240)
                        gr.Markdown("In this case, the source-target augmentations are applied to the source image for straight-forward visualization. In actual training,\
                                     the augmentations are applied to either source or target face with 1:1 probability. Augmentations applied here \
                                     include RGBShift, HueSaturationValue, RandomBrightnessContrast, RandomDownScale, Sharpen from Albumentations.")

                    with gr.Box():
                        gr.Markdown("## Step 3", elem_id="custom_header")
                        gr.Markdown(
                            "Apply affine/elastic augmentations to augmented source image/mask", elem_id="custom_header")
                        with gr.Row():
                            source_after_affine_transforms = gr.Image(
                                label="Source face after affine augmentations")
                            source_after_affine_transforms.style(height=240)

                            mask_after_affine_transforms = gr.Image(
                                label="Mask after elastic augmentations")
                            mask_after_affine_transforms.style(height=240)

                    with gr.Box():
                        gr.Markdown("## Step 4", elem_id="custom_header")
                        gr.Markdown(
                            "Apply smoothing augmentations to mask for gentle blending", elem_id="custom_header")
                        mask = gr.Image(label="Mask after Gaussian smoothing")
                        mask.style(height=240)

                    with gr.Box():
                        gr.Markdown("## Step 5", elem_id="custom_header")
                        gr.Markdown(
                            "Perform blending (based on chosen blending option) to produce self-blended fake", elem_id="custom_header")
                        with gr.Row():
                            img_r_before_both_transforms = gr.Image(
                                label="Real face (for comparison)")
                            img_r_before_both_transforms.style(height=240)

                            img_f_before_both_transforms = gr.Image(
                                label="Self-blended fake face")
                            img_f_before_both_transforms.style(height=240)

                    with gr.Box():
                        gr.Markdown("## Step 6", elem_id="custom_header")
                        gr.Markdown(
                            "Apply the same randomly chosen augmentations to both real and self-blended fake", elem_id="custom_header")
                        with gr.Row():
                            img_r_after_both_transforms = gr.Image(
                                label="Real face after augmentations")
                            img_r_after_both_transforms.style(height=240)

                            img_f_after_both_transforms = gr.Image(
                                label="Self-blended fake face after augmentations")
                            img_f_after_both_transforms.style(height=240)
                        gr.Markdown(
                            "Augmentations applied here include RGBShift, HueSaturationValue, RandomBrightnessContrast, ImageCompression from Albumentations.")

                    with gr.Box():
                        gr.Markdown("## Step 7", elem_id="custom_header")
                        gr.Markdown(
                            "Crop real and self-blended fake to only have the faces", elem_id="custom_header")
                        with gr.Row():
                            output_r = gr.Image(label="Final real face")
                            output_r.style(height=240)

                            output_f = gr.Image(label="Final SBI face")
                            output_f.style(height=240)
                        gr.Markdown(
                            "These are the images that are eventually fed into the model for training", elem_id="custom_header")

    with gr.Tab("Consistency Augmentations"):
        gr.Markdown("Input an image with a face to visualize the consistency augmentations. Values for augmentations are randomly chosen.", elem_id="custom_header")
        with gr.Row():
            with gr.Box():
                gr.Markdown("## Input", elem_id="custom_header")
                input_image_ca = gr.Image(label="Input Image")
                input_image_ca.style(height=240)
                btn_ca = gr.Button(value="Submit")
                btn_ca.style(full_width=True)
                gr.Examples(
                    examples=examples_sbi,
                    inputs=input_image_ca,
                    fn=generate_ca,
                    cache_examples=False,
                )
        with gr.Row():
            with gr.Box():
                with gr.Row():
                    og = gr.Image(label="Cropped Face (No augs)")
                    og.style(height=240)
                    re = gr.Image(label="Random Erasing")
                    re.style(height=240)
                    rc = gr.Image(label="Random Cropping")
                    rc.style(height=240)
                    dfdc = gr.Image(label="DFDC Selimsef")
                    dfdc.style(height=240)

    btn.click(predict_image, inputs=[input_image, model_selection], outputs=[
              label_probs, output_image])
    btn_video.click(predict_video, inputs=[input_video, model_selection_video], outputs=[
                    label_probs_video, output_image_video])
    btn_sbi.click(generate_sbi, inputs=[input_image_sbi, blending_type, face_region], outputs=[cropped_input_face, img_r_before_both_transforms, img_f_before_both_transforms,
                                                                                               img_r_after_both_transforms, img_f_after_both_transforms, output_r, output_f, mask, 
                                                                                               mask_original, source_before_affine_transforms, source_after_affine_transforms, mask_after_affine_transforms])
    btn_ca.click(generate_ca, inputs=[
                 input_image_ca], outputs=[og, re, rc, dfdc])
if __name__ == "__main__":
    demo.launch()