Spaces:

Sumsub
/

Sumsub-ffs-demo

Running

App Files Files Community

RomanShnurov commited on Sep 25, 2023

Commit

f3b2c5b

1 Parent(s): 295487b

add new synthetic detector

Browse files

Files changed (6) hide show

.gitignore +160 -0
app.py +32 -82
model_classes.py +51 -0
model_loader.py +59 -0
model_transforms.py +25 -0
models/synthetic_detector_v2.pt +3 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,160 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

app.py CHANGED Viewed

@@ -1,93 +1,40 @@
-import os
-os.system("python -m pip install --upgrade pip")
-os.system("pip install git+https://github.com/rwightman/pytorch-image-models")
-os.system("pip install git+https://github.com/huggingface/huggingface_hub")
 import gradio as gr
-import timm
-import torch
-from torch import nn
 from torch.nn import functional as F
-import torchvision
-class Model200M(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = timm.create_model('convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384', pretrained=False,
-                                       num_classes=0)
-        self.clf = nn.Sequential(
-            nn.Linear(1536, 128),
-            nn.ReLU(inplace=True),
-            nn.Linear(128, 2))
-    def forward(self, image):
-        image_features = self.model(image)
-        return self.clf(image_features)
-class Model5M(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model = timm.create_model('timm/tf_mobilenetv3_large_100.in1k', pretrained=False, num_classes=0)
-        self.clf = nn.Sequential(
-            nn.Linear(1280, 128),
-            nn.ReLU(inplace=True),
-            nn.Linear(128, 2))
-    def forward(self, image):
-        image_features = self.model(image)
-        return self.clf(image_features)
-def load_model(name: str):
-    model = Model200M() if "200M" in name else Model5M()
-    ckpt = torch.load(name, map_location=torch.device('cpu'))
-    model.load_state_dict(ckpt)
-    model.eval()
-    return model
-model_list = {
-    'midjourney_200M': load_model('models/midjourney200M.pt'),
-    'diffusions_200M': load_model('models/diffusions200M.pt'),
-    'midjourney_5M': load_model('models/midjourney5M.pt'),
-    'diffusions_5M': load_model('models/diffusions5M.pt')
-}
-tfm = torchvision.transforms.Compose([
-    torchvision.transforms.Resize((640, 640)),
-    torchvision.transforms.ToTensor(),
-    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                     std=[0.229, 0.224, 0.225]),
-])
-tfm_small = torchvision.transforms.Compose([
-    torchvision.transforms.Resize((224, 224)),
-    torchvision.transforms.ToTensor(),
-    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                                     std=[0.229, 0.224, 0.225]),
-])
-def predict_from_model(model, img_1):
-    y = model.forward(img_1[None, ...])
     y_1 = F.softmax(y, dim=1)[:, 1].cpu().detach().numpy()
     y_2 = F.softmax(y, dim=1)[:, 0].cpu().detach().numpy()
     return {'created by AI': y_1.tolist(),
             'created by human': y_2.tolist()}
 def predict(raw_image, model_name):
-    img_1 = tfm(raw_image)
-    img_2 = tfm_small(raw_image)
-    if model_name not in model_list:
         return {'error': [0.]}
-    model = model_list[model_name]
-    img = img_1 if "200M" in model_name else img_2
-    return predict_from_model(model, img)
 general_examples = [
     ["images/general/img_1.jpg"],
@@ -125,8 +72,9 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
-        <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>.<br>
-        We provide several detectors for images generated by popular tools, such as Midjourney and Stable Diffusion.<br>
         Please refer to model cards for evaluation metrics and limitations.
         """
     )
@@ -134,7 +82,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil")
-            drop_down = gr.Dropdown(model_list.keys(), type="value", label="Model", value="diffusions_200M")
             with gr.Row():
                 gr.ClearButton(components=[image_input])
                 submit_button = gr.Button("Submit", variant="primary")
@@ -154,12 +102,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         <h3>Models</h3>
         <p><code>*_200M</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>640x640</code></p>
         <p><code>*_5M</code> models are based on <code>tf_mobilenetv3_large_100.in1k</code> with image size <code>224x224</code></p>
         <h3>Details</h3>
         <li>Model cards: <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
                          <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
                          <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
-                         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>.
         </li>
         <li>License: CC-By-SA-3.0</li>
         """

 import gradio as gr
 from torch.nn import functional as F
+from model_loader import ModelType, type_to_transforms, type_to_loaded_model
+def predict_from_model(model_type, raw_image):
+    tfm = type_to_transforms[model_type]
+    model = type_to_loaded_model[model_type]
+    img = tfm(raw_image)
+    y = None
+    if model_type == ModelType.SYNTHETIC_DETECTOR_V2:
+        y = model.forward(img.unsqueeze(0).to("cpu"))
+    else:
+        y = model.forward(img[None, ...])
     y_1 = F.softmax(y, dim=1)[:, 1].cpu().detach().numpy()
     y_2 = F.softmax(y, dim=1)[:, 0].cpu().detach().numpy()
     return {'created by AI': y_1.tolist(),
             'created by human': y_2.tolist()}
+def get_y(model_type, model, image):
+    if model_type == ModelType.SYNTHETIC_DETECTOR_V2:
+        return model.forward(image.unsqueeze(0).to("cpu"))
+    return model.forward(image[None, ...])
 def predict(raw_image, model_name):
+    if model_name not in ModelType.get_list():
         return {'error': [0.]}
+    model_type = ModelType[str(model_name).upper()].value
+    model = type_to_loaded_model[model_type]
+    tfm = type_to_transforms[model_type]
+    image = tfm(raw_image)
+    y = get_y(model_type, model, image)
+    y_1 = F.softmax(y, dim=1)[:, 1].cpu().detach().numpy()
+    y_2 = F.softmax(y, dim=1)[:, 0].cpu().detach().numpy()
+    return {'created by AI': y_1.tolist(),
+            'created by human': y_2.tolist()}
 general_examples = [
     ["images/general/img_1.jpg"],
         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
+        <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>,
+        <a href=''>synthetic_detector_v2</a>.
+        <br>We provide several detectors for images generated by popular tools, such as Midjourney and Stable Diffusion.<br>
         Please refer to model cards for evaluation metrics and limitations.
         """
     )
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil")
+            drop_down = gr.Dropdown(ModelType.get_list(), type="value", label="Model", value=ModelType.SYNTHETIC_DETECTOR_V2)
             with gr.Row():
                 gr.ClearButton(components=[image_input])
                 submit_button = gr.Button("Submit", variant="primary")
         <h3>Models</h3>
         <p><code>*_200M</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>640x640</code></p>
         <p><code>*_5M</code> models are based on <code>tf_mobilenetv3_large_100.in1k</code> with image size <code>224x224</code></p>
+        <p><code>synthetic_detector_2.0</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>384x384</code></p>
         <h3>Details</h3>
         <li>Model cards: <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
                          <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
                          <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
+                         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>,
+                         <a href=''>synthetic_detector_v2</a>.
         </li>
         <li>License: CC-By-SA-3.0</li>
         """

model_classes.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import timm
+import torch
+from torch import nn
+import pytorch_lightning as pl
+from pytorch_lightning.core.mixins import HyperparametersMixin
+class Model200M(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.model = timm.create_model('convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384', pretrained=False,
+                                       num_classes=0)
+        self.clf = nn.Sequential(
+            nn.Linear(1536, 128),
+            nn.ReLU(inplace=True),
+            nn.Linear(128, 2))
+    def forward(self, image):
+        image_features = self.model(image)
+        return self.clf(image_features)
+class Model5M(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.model = timm.create_model('timm/tf_mobilenetv3_large_100.in1k', pretrained=False, num_classes=0)
+        self.clf = nn.Sequential(
+            nn.Linear(1280, 128),
+            nn.ReLU(inplace=True),
+            nn.Linear(128, 2))
+    def forward(self, image):
+        image_features = self.model(image)
+        return self.clf(image_features)
+class SyntheticV2(pl.LightningModule, HyperparametersMixin):
+    def __init__(self):
+        super().__init__()
+        self.model = timm.create_model('convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384', pretrained=False,
+                                       num_classes=0)
+        self.clf = nn.Sequential(
+            nn.Linear(1536, 128),
+            nn.ReLU(inplace=True),
+            nn.Linear(128, 2))
+    def forward(self, image):
+        image_features = self.model(image)
+        return self.clf(image_features)

model_loader.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from enum import Enum
+import torch
+from model_classes import Model200M, Model5M, SyntheticV2
+from model_transforms import transform_200M, transform_5M, transform_synthetic
+class ModelType(str, Enum):
+    MIDJOURNEY_200M = "midjourney_200M"
+    DIFFUSIONS_200M = "diffusions_200M"
+    MIDJOURNEY_5M = "midjourney_5M"
+    DIFFUSIONS_5M = "diffusions_5M"
+    SYNTHETIC_DETECTOR_V2 = "synthetic_detector_v2"
+    def __str__(self):
+        return str(self.value)
+    @staticmethod
+    def get_list():
+        return [model_type.value for model_type in ModelType]
+def load_model(value: ModelType):
+    model = type_to_class[value]
+    path = type_to_path[value]
+    ckpt = torch.load(path, map_location=torch.device('cpu'))
+    model.load_state_dict(ckpt)
+    model.eval()
+    return model
+type_to_class = {
+    ModelType.MIDJOURNEY_200M : Model200M(),
+    ModelType.DIFFUSIONS_200M : Model200M(),
+    ModelType.MIDJOURNEY_5M : Model5M(),
+    ModelType.DIFFUSIONS_5M : Model5M(),
+    ModelType.SYNTHETIC_DETECTOR_V2 : SyntheticV2(),
+}
+type_to_path = {
+    ModelType.MIDJOURNEY_200M : 'models/midjourney200M.pt',
+    ModelType.DIFFUSIONS_200M : 'models/diffusions200M.pt',
+    ModelType.MIDJOURNEY_5M : 'models/midjourney5M.pt',
+    ModelType.DIFFUSIONS_5M : 'models/diffusions5M.pt',
+    ModelType.SYNTHETIC_DETECTOR_V2 : 'models/synthetic_detector_v2.pt',
+}
+type_to_loaded_model = {
+    ModelType.MIDJOURNEY_200M: load_model(ModelType.MIDJOURNEY_200M),
+    ModelType.DIFFUSIONS_200M: load_model(ModelType.DIFFUSIONS_200M),
+    ModelType.MIDJOURNEY_5M: load_model(ModelType.MIDJOURNEY_5M),
+    ModelType.DIFFUSIONS_5M: load_model(ModelType.DIFFUSIONS_5M),
+    ModelType.SYNTHETIC_DETECTOR_V2: load_model(ModelType.SYNTHETIC_DETECTOR_V2)
+}
+type_to_transforms = {
+    ModelType.MIDJOURNEY_200M: transform_200M,
+    ModelType.DIFFUSIONS_200M: transform_200M,
+    ModelType.MIDJOURNEY_5M: transform_5M,
+    ModelType.DIFFUSIONS_5M: transform_5M,
+    ModelType.SYNTHETIC_DETECTOR_V2: transform_synthetic
+}

model_transforms.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import timm
+import torchvision
+data_config = {'input_size': (3, 384, 384),
+               'interpolation': 'bicubic',
+               'mean': (0.48145466, 0.4578275, 0.40821073),
+               'std': (0.26862954, 0.26130258, 0.27577711),
+               'crop_pct': 1.0,
+               'crop_mode': 'squash'}
+transform_synthetic = timm.data.create_transform(**data_config, is_training=False)
+transform_200M = torchvision.transforms.Compose([
+    torchvision.transforms.Resize((640, 640)),
+    torchvision.transforms.ToTensor(),
+    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225]),
+])
+transform_5M = torchvision.transforms.Compose([
+    torchvision.transforms.Resize((224, 224)),
+    torchvision.transforms.ToTensor(),
+    torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
+                                     std=[0.229, 0.224, 0.225]),
+])

models/synthetic_detector_v2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:89a955ec54bddab759228757e437d300b6b86bbba9f45cfd5ecd0e3d7dec83a2
+size 795263437