|
import gradio as gr |
|
import torch |
|
import torch.nn.functional as F |
|
from facenet_pytorch import MTCNN, InceptionResnetV1 |
|
import os |
|
import numpy as np |
|
from PIL import Image |
|
import zipfile |
|
import cv2 |
|
from pytorch_grad_cam import GradCAM |
|
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget |
|
from pytorch_grad_cam.utils.image import show_cam_on_image |
|
|
|
|
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
|
|
|
|
|
''' |
|
class Seafoam(Base): |
|
def __init__( |
|
self, |
|
*, |
|
primary_hue: colors.Color | str = colors.emerald, |
|
secondary_hue: colors.Color | str = colors.blue, |
|
neutral_hue: colors.Color | str = colors.blue, |
|
spacing_size: sizes.Size | str = sizes.spacing_md, |
|
radius_size: sizes.Size | str = sizes.radius_md, |
|
text_size: sizes.Size | str = sizes.text_lg, |
|
font: fonts.Font |
|
| str |
|
| Iterable[fonts.Font | str] = ( |
|
fonts.GoogleFont("Quicksand"), |
|
"ui-sans-serif", |
|
"sans-serif", |
|
), |
|
font_mono: fonts.Font |
|
| str |
|
| Iterable[fonts.Font | str] = ( |
|
fonts.GoogleFont("IBM Plex Mono"), |
|
"ui-monospace", |
|
"monospace", |
|
), |
|
): |
|
super().__init__( |
|
primary_hue=primary_hue, |
|
secondary_hue=secondary_hue, |
|
neutral_hue=neutral_hue, |
|
spacing_size=spacing_size, |
|
radius_size=radius_size, |
|
text_size=text_size, |
|
font=font, |
|
font_mono=font_mono, |
|
) |
|
super().set( |
|
body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)", |
|
body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)", |
|
button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)", |
|
button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)", |
|
button_primary_text_color="white", |
|
button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)", |
|
slider_color="*secondary_300", |
|
slider_color_dark="*secondary_600", |
|
block_title_text_weight="600", |
|
block_border_width="3px", |
|
block_shadow="*shadow_drop_lg", |
|
button_shadow="*shadow_drop_lg", |
|
button_large_padding="32px", |
|
) |
|
|
|
my_theme = Seafoam() |
|
''' |
|
|
|
|
|
|
|
my_theme = gr.themes.Monochrome() |
|
|
|
|
|
|
|
|
|
|
|
pipe = pipeline("image-classification", "nightfury/AI-picture-detector") |
|
|
|
def image_classifier(image): |
|
outputs = pipe(image) |
|
results = {} |
|
for result in outputs: |
|
results[result['label']] = result['score'] |
|
return results |
|
|
|
|
|
|
|
with zipfile.ZipFile("examples.zip","r") as zip_ref: |
|
zip_ref.extractall(".") |
|
|
|
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
'''cuda:0''' |
|
mtcnn = MTCNN( |
|
select_largest=False, |
|
post_process=False, |
|
device=DEVICE |
|
).to(DEVICE).eval() |
|
|
|
model = InceptionResnetV1( |
|
pretrained="vggface2", |
|
classify=True, |
|
num_classes=1, |
|
device=DEVICE |
|
) |
|
|
|
checkpoint = torch.load("resnetinceptionv1_epoch_32.pth", map_location=torch.device('cpu')) |
|
model.load_state_dict(checkpoint['model_state_dict']) |
|
model.to(DEVICE) |
|
model.eval() |
|
|
|
EXAMPLES_FOLDER = 'examples' |
|
examples_names = os.listdir(EXAMPLES_FOLDER) |
|
examples = [] |
|
for example_name in examples_names: |
|
example_path = os.path.join(EXAMPLES_FOLDER, example_name) |
|
label = example_name.split('_')[0] |
|
example = { |
|
'path': example_path, |
|
'label': label |
|
} |
|
examples.append(example) |
|
np.random.shuffle(examples) |
|
|
|
def predict(input_image:Image.Image, true_label:str): |
|
"""Predict the label of the input_image""" |
|
face = mtcnn(input_image) |
|
if face is None: |
|
raise Exception('No face detected') |
|
return "No Photoreal face detected" |
|
face = face.unsqueeze(0) |
|
face = F.interpolate(face, size=(256, 256), mode='bilinear', align_corners=False) |
|
|
|
|
|
prev_face = face.squeeze(0).permute(1, 2, 0).cpu().detach().int().numpy() |
|
prev_face = prev_face.astype('uint8') |
|
|
|
face = face.to(DEVICE) |
|
face = face.to(torch.float32) |
|
face = face / 255.0 |
|
face_image_to_plot = face.squeeze(0).permute(1, 2, 0).cpu().detach().int().numpy() |
|
|
|
target_layers=[model.block8.branch1[-1]] |
|
use_cuda = True if torch.cuda.is_available() else False |
|
|
|
cam = GradCAM(model=model, target_layers=target_layers) |
|
|
|
targets = [ClassifierOutputTarget(0)] |
|
|
|
grayscale_cam = cam(input_tensor=face, targets=targets, eigen_smooth=True) |
|
grayscale_cam = grayscale_cam[0, :] |
|
visualization = show_cam_on_image(face_image_to_plot, grayscale_cam, use_rgb=True) |
|
face_with_mask = cv2.addWeighted(prev_face, 1, visualization, 0.5, 0) |
|
|
|
with torch.no_grad(): |
|
output = torch.sigmoid(model(face).squeeze(0)) |
|
prediction = "real" if output.item() < 0.5 else "fake" |
|
|
|
real_prediction = 1 - output.item() |
|
fake_prediction = output.item() |
|
|
|
confidences = { |
|
'real': real_prediction, |
|
'fake': fake_prediction |
|
} |
|
return confidences, true_label, face_with_mask |
|
|
|
title1 = "Deepfake Image Detection" |
|
description1 = "~ AI - ML implementation for fake and real image detection..." |
|
article1 = "<p style='text-align: center'>...</p>" |
|
|
|
|
|
|
|
|
|
interface1 = gr.Interface( |
|
fn=predict, |
|
inputs=[ |
|
gr.inputs.Image(label="Input Image", type="pil"), |
|
"text" |
|
], |
|
outputs=[ |
|
gr.outputs.Label(label="Prediction Model - % of Fake or Real image detection"), |
|
"text", |
|
gr.outputs.Image(label="Face with Explainability", type="pil") |
|
|
|
], |
|
theme = my_theme, |
|
title = title1, |
|
description = description1, |
|
article = article1 |
|
|
|
) |
|
|
|
|
|
title2 = "AI Generated Image Detection" |
|
description2 = "~ AI - ML implementation for AI image detection using older models such as VQGAN+CLIP." |
|
article2 = """ |
|
NOTE: |
|
- To detect pictures generated using older models such as VQGAN+CLIP, please use the updated version of this detector instead. |
|
- In this model i'm using a ViT model to predict whether an artistic image was generated using AI or not. |
|
- The training dataset didn't include any samples generated from Midjourney 5, SDXL, or DALLE-3. But was trained on outputs of their predecessors. |
|
- Scope of this tool is 'artistic images'; that is to say, it is not a deepfake photo detector, and general computer imagery (webcams, screenshots, etc.) may throw it off. |
|
- The potential indicator for this tool is to serve to detect whether an image was AI-generated or not. |
|
- Images scoring as very probably artificial (e.g. 90% or higher) could be referred to a human expert for further investigation, if needed. |
|
""" |
|
|
|
interface2 = gr.Interface(fn=image_classifier, inputs=gr.Image(type="pil"), outputs="label", theme = my_theme, title=title2, description=description2, article = article2) |
|
|
|
|
|
|
|
''' |
|
interface2 = gr.Interface( |
|
fn=image_classifier, |
|
inputs=[ |
|
gr.inputs.Image(label="Input Image", type="pil"), |
|
"text" |
|
], |
|
outputs=[ |
|
gr.outputs.Label(label="Is it Artificial or Human"), |
|
"text", |
|
#ValueError: Invalid value for parameter `type`: auto. Please choose from one of: ['numpy', 'pil', 'filepath'] |
|
], |
|
|
|
theme = gr.themes.Soft(), |
|
title = title1, |
|
description = description1, |
|
article = article1 |
|
) |
|
''' |
|
|
|
gr.TabbedInterface( |
|
[interface1, interface2], ["Deepfake Image Detection", "AI Image Detection"] |
|
).launch() |
|
|