Sumsub-ffs-demo / app.py
kalaidin's picture
Update app.py
70bcea5
import gradio as gr
from torch.nn import functional as F
from model_loader import ModelType, type_to_transforms, type_to_loaded_model
def get_y(model_type, model, image):
if model_type == ModelType.SYNTHETIC_DETECTOR_V2:
return model.forward(image.unsqueeze(0).to("cpu"))
return model.forward(image[None, ...])
def predict(raw_image, model_name):
if model_name not in ModelType.get_list():
return {'error': [0.]}
model_type = ModelType[str(model_name).upper()].value
model = type_to_loaded_model[model_type]
tfm = type_to_transforms[model_type]
image = tfm(raw_image)
y = get_y(model_type, model, image)
y_1 = F.softmax(y, dim=1)[:, 1].cpu().detach().numpy()
y_2 = F.softmax(y, dim=1)[:, 0].cpu().detach().numpy()
return {'created by AI': y_1.tolist(),
'created by human': y_2.tolist()}
general_examples = [
["images/general/img_1.jpg"],
["images/general/img_2.jpg"],
["images/general/img_3.jpg"],
["images/general/img_4.jpg"],
["images/general/img_5.jpg"],
["images/general/img_6.jpg"],
["images/general/img_7.jpg"],
["images/general/img_8.jpg"],
["images/general/img_9.jpg"],
["images/general/img_10.jpg"],
]
optic_examples = [
["images/optic/img_1.jpg"],
["images/optic/img_2.jpg"],
["images/optic/img_3.jpg"],
["images/optic/img_4.jpg"],
["images/optic/img_5.jpg"],
]
famous_deepfake_examples = [
["images/famous_deepfakes/img_1.jpg"],
["images/famous_deepfakes/img_2.jpg"],
["images/famous_deepfakes/img_3.jpg"],
["images/famous_deepfakes/img_4.webp"],
]
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
<h1 style="text-align: center;">For Fake's Sake: a set of models for detecting generated and synthetic images</h3>
This is a demo space for synthetic image detectors:
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a> (Aug, 2023),
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a> (Aug, 2023),
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a> (Aug, 2023),
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a> (Aug, 2023),
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-2.0'>synthetic_detector_v2</a> (Sep, 2023).
<br>We provide several detectors for images generated by popular tools, such as Midjourney and Stable Diffusion.<br>
Please refer to model cards for evaluation metrics and limitations.
"""
)
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil")
drop_down = gr.Dropdown(ModelType.get_list(), type="value", label="Model", value=ModelType.SYNTHETIC_DETECTOR_V2)
with gr.Row():
gr.ClearButton(components=[image_input])
submit_button = gr.Button("Submit", variant="primary")
with gr.Column():
result_score = gr.Label(label='result', num_top_classes=2)
with gr.Tab("Examples"):
gr.Examples(examples=general_examples, inputs=image_input)
# with gr.Tab("More examples"):
# gr.Examples(examples=optic_examples, inputs=image_input)
with gr.Tab("Widely known deepfakes"):
gr.Examples(examples=famous_deepfake_examples, inputs=image_input)
submit_button.click(predict, inputs=[image_input, drop_down], outputs=result_score)
gr.Markdown(
"""
<h3>Models</h3>
<p><code>*_200M</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>640x640</code></p>
<p><code>*_5M</code> models are based on <code>tf_mobilenetv3_large_100.in1k</code> with image size <code>224x224</code></p>
<p><code>synthetic_detector_2.0</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>384x384</code></p>
<h3>Details</h3>
<li>Model cards: <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-2.0'>synthetic_detector_v2</a>.
</li>
<li>License: CC-By-SA-3.0</li>
<h3>Limitations</h3>
The model output should only be used as an indication that an image may have been (but not definitely) artificially generated.
Current models may face challenges in accurately predicting the class for real-world examples that are extremely vibrant and of exceptionally high quality.
In such cases, the richness of colors and fine details may lead to misclassifications due to the complexity of the input. This could potentially cause the model to focus on visual aspects that are not necessarily indicative of the true class.
"""
)
demo.launch()