Spaces:

neggles
/

pi-tagger

Runtime error

App Files Files

neggles commited on Mar 6, 2024

Commit

2b6048b

1 Parent(s): 7534598

init

Browse files

Files changed (16) hide show

.editorconfig +34 -0
.gitattributes +1 -0
.gitignore +253 -0
.pre-commit-config.yaml +27 -0
.vscode/settings.json +94 -0
LICENSE.md +25 -0
README.md +13 -4
app.py +168 -0
data/selected_tags.csv +0 -0
examples/img-01.png +3 -0
examples/img-02.png +3 -0
pyproject.toml +87 -0
requirements.txt +9 -0
tagger/__init__.py +10 -0
tagger/common.py +103 -0
tagger/model.py +36 -0

.editorconfig ADDED Viewed

	@@ -0,0 +1,34 @@

+# http://editorconfig.org
+root = true
+[*]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+charset = utf-8
+end_of_line = lf
+[*.bat]
+indent_style = tab
+end_of_line = crlf
+[*.{json,jsonc}]
+indent_style = space
+indent_size = 2
+[.vscode/*.{json,jsonc}]
+indent_style = space
+indent_size = 4
+[*.{yml,yaml,toml}]
+indent_style = space
+indent_size = 2
+[*.md]
+trim_trailing_whitespace = false
+[Makefile]
+indent_style = tab
+indent_size = 8

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,253 @@

+# Created by https://www.toptal.com/developers/gitignore/api/linux,windows,macos,visualstudiocode,python
+# Edit at https://www.toptal.com/developers/gitignore?templates=linux,windows,macos,visualstudiocode,python
+### Linux ###
+*~
+# temporary files which can be created if a process still has a handle open of a deleted file
+.fuse_hidden*
+# KDE directory preferences
+.directory
+# Linux trash folder which might appear on any partition or disk
+.Trash-*
+# .nfs files are created when an open file is removed but is still being accessed
+.nfs*
+### macOS ###
+# General
+.DS_Store
+.AppleDouble
+.LSOverride
+# Icon must end with two \r
+Icon
+# Thumbnails
+._*
+# Files that might appear in the root of a volume
+.DocumentRevisions-V100
+.fseventsd
+.Spotlight-V100
+.TemporaryItems
+.Trashes
+.VolumeIcon.icns
+.com.apple.timemachine.donotpresent
+# Directories potentially created on remote AFP share
+.AppleDB
+.AppleDesktop
+Network Trash Folder
+Temporary Items
+.apdisk
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+### VisualStudioCode ###
+.vscode/*
+!.vscode/settings.json
+!.vscode/tasks.json
+!.vscode/launch.json
+!.vscode/extensions.json
+*.code-workspace
+# Local History for Visual Studio Code
+.history/
+### VisualStudioCode Patch ###
+# Ignore all local history of files
+.history
+.ionide
+### Windows ###
+# Windows thumbnail cache files
+Thumbs.db
+Thumbs.db:encryptable
+ehthumbs.db
+ehthumbs_vista.db
+# Dump file
+*.stackdump
+# Folder config file
+[Dd]esktop.ini
+# Recycle Bin used on file shares
+$RECYCLE.BIN/
+# Windows Installer files
+*.cab
+*.msi
+*.msix
+*.msm
+*.msp
+# Windows shortcuts
+*.lnk
+# End of https://www.toptal.com/developers/gitignore/api/linux,windows,macos,visualstudiocode,python
+# temp and misc
+/misc/
+/temp/
+# outputs and such
+/logs/
+/cache/
+# direnv
+.envrc
+.envrc.*
+# dotenv
+.env
+.env.*
+# temp files
+**/tmp_*.*
+**/*.tmp.*
+# but keep examples
+!*.example

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+# See https://pre-commit.com for more information
+ci:
+  autofix_prs: true
+  autoupdate_branch: "main"
+  autoupdate_commit_msg: "[pre-commit.ci] pre-commit autoupdate"
+  autoupdate_schedule: weekly
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.2.0
+    hooks:
+      # Run the linter.
+      - id: ruff
+        types_or: [python, pyi, jupyter]
+        args: [--fix, --exit-non-zero-on-fix]
+      # Run the formatter.
+      - id: ruff-format
+        types_or: [python, pyi, jupyter]
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: trailing-whitespace
+        exclude_types:
+          - "markdown"
+      - id: end-of-file-fixer
+      - id: check-yaml

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,94 @@

+{
+    "editor.insertSpaces": true,
+    "editor.tabSize": 4,
+    "files.trimTrailingWhitespace": true,
+    "editor.rulers": [100, 120],
+    "files.associations": {
+        "*.yaml": "yaml"
+    },
+    "files.exclude": {
+        "**/.git": true,
+        "**/.svn": true,
+        "**/.hg": true,
+        "**/CVS": true,
+        "**/.DS_Store": true,
+        "**/Thumbs.db": true,
+        "**/.ruff_cache": true,
+        "**/__pycache__": true,
+        "**/*.egg-info": true
+    },
+    "[shellscript]": {
+        "files.eol": "\n",
+        "editor.tabSize": 4,
+        "editor.detectIndentation": false
+    },
+    "[python]": {
+        "editor.wordBasedSuggestions": "off",
+        "editor.formatOnSave": true,
+        "editor.defaultFormatter": "charliermarsh.ruff",
+        "editor.codeActionsOnSave": {
+            "source.organizeImports": "always"
+        }
+    },
+    "ruff.format.args": ["--line-length", "110"],
+    "[json]": {
+        "editor.defaultFormatter": "esbenp.prettier-vscode",
+        "editor.detectIndentation": false,
+        "editor.formatOnSaveMode": "file",
+        "editor.formatOnSave": true,
+        "editor.tabSize": 2
+    },
+    "[jsonc]": {
+        "editor.defaultFormatter": "esbenp.prettier-vscode",
+        "editor.detectIndentation": false,
+        "editor.formatOnSaveMode": "file",
+        "editor.formatOnSave": true,
+        "editor.tabSize": 2
+    },
+    "[toml]": {
+        "editor.tabSize": 2,
+        "editor.detectIndentation": false,
+        "editor.formatOnSave": true,
+        "editor.formatOnSaveMode": "file",
+        "editor.defaultFormatter": "tamasfe.even-better-toml",
+        "editor.rulers": [80, 100]
+    },
+    "evenBetterToml.formatter.columnWidth": 88,
+    "[yaml]": {
+        "editor.detectIndentation": false,
+        "editor.tabSize": 2,
+        "editor.formatOnSave": true,
+        "editor.formatOnSaveMode": "file",
+        "diffEditor.ignoreTrimWhitespace": false,
+        "editor.defaultFormatter": "redhat.vscode-yaml"
+    },
+    "yaml.format.bracketSpacing": true,
+    "yaml.format.proseWrap": "preserve",
+    "yaml.format.singleQuote": false,
+    "yaml.format.printWidth": 110,
+    "[hcl]": {
+        "editor.detectIndentation": false,
+        "editor.formatOnSave": true,
+        "editor.formatOnSaveMode": "file",
+        "editor.defaultFormatter": "fredwangwang.vscode-hcl-format"
+    },
+    "[markdown]": {
+        "files.trimTrailingWhitespace": false
+    },
+    "css.lint.validProperties": ["dock", "content-align", "content-justify"],
+    "[css]": {
+        "editor.formatOnSave": true
+    },
+    "remote.autoForwardPorts": false,
+    "remote.autoForwardPortsSource": "process"
+}

LICENSE.md ADDED Viewed

	@@ -0,0 +1,25 @@

+The MIT License (MIT)
+=====================
+Copyright © 2024 Andi Powers-Holmes <aholmes@omnom.net>
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the “Software”), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.

README.md CHANGED Viewed

@@ -1,12 +1,21 @@
 ---
-title: Pi Tagger
 emoji: 🌖
-colorFrom: purple
-colorTo: purple
 sdk: gradio
 sdk_version: 4.19.2
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: pi-chan tagger
 emoji: 🌖
+colorFrom: indigo
+colorTo: pink
 sdk: gradio
 sdk_version: 4.19.2
 app_file: app.py
 pinned: false
+short_description: A WD Tagger Space for pi-chan to use
+preload_from_hub:
+  - SmilingWolf/wd-v1-4-moat-tagger-v2 model.onnx
+  - SmilingWolf/wd-v1-4-swinv2-tagger-v2 model.onnx
+  - SmilingWolf/wd-v1-4-convnext-tagger-v2 model.onnx
+  - SmilingWolf/wd-v1-4-convnextv2-tagger-v2 model.onnx
+  - SmilingWolf/wd-v1-4-vit-tagger-v2 model.onnx
 ---
+# pi-chan tagger
+WD Tagger space for a prompt inspector to use as a backend.

app.py ADDED Viewed

	@@ -0,0 +1,168 @@

+from os import getenv
+from pathlib import Path
+from typing import Optional
+import gradio as gr
+import numpy as np
+import onnxruntime as rt
+from PIL import Image
+from tagger.common import LabelData, load_labels, preprocess_image
+from tagger.model import create_session
+HF_TOKEN = getenv("HF_TOKEN", None)
+WORK_DIR = Path.cwd().resolve()
+MODEL_VARIANTS: dict[str, str] = {
+    "MOAT": "SmilingWolf/wd-v1-4-moat-tagger-v2",
+    "SwinV2": "SmilingWolf/wd-v1-4-swinv2-tagger-v2",
+    "ConvNeXT": "SmilingWolf/wd-v1-4-convnext-tagger-v2",
+    "ConvNeXTv2": "SmilingWolf/wd-v1-4-convnextv2-tagger-v2",
+    "ViT": "SmilingWolf/wd-v1-4-vit-tagger-v2",
+}
+# allowed extensions
+IMAGE_EXTENSIONS = [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff", ".tif"]
+# model input shape
+IMAGE_SIZE = 448
+example_images = sorted(
+    [
+        str(x.relative_to(WORK_DIR))
+        for x in WORK_DIR.joinpath("examples").iterdir()
+        if x.is_file() and x.suffix.lower() in IMAGE_EXTENSIONS
+    ]
+)
+loaded_models: dict[str, Optional[rt.InferenceSession]] = {k: None for k, _ in MODEL_VARIANTS.items()}
+def load_model(variant: str) -> rt.InferenceSession:
+    global loaded_models
+    # resolve the repo name
+    model_repo = MODEL_VARIANTS.get(variant, None)
+    if model_repo is None:
+        raise ValueError(f"Unknown model variant: {variant}")
+    if loaded_models.get(variant, None) is None:
+        # save model to cache
+        loaded_models[variant] = create_session(model_repo, token=HF_TOKEN)
+    return loaded_models[variant]
+def predict(
+    image: Image.Image,
+    variant: str,
+    general_threshold: float = 0.35,
+    character_threshold: float = 0.85,
+):
+    # Load model
+    model: rt.InferenceSession = load_model(variant)
+    # load labels
+    labels: LabelData = load_labels()
+    # get input size and name
+    _, h, w, _ = model.get_inputs()[0].shape
+    input_name = model.get_inputs()[0].name
+    output_name = model.get_outputs()[0].name
+    # preprocess image
+    image = preprocess_image(image, (h, w))
+    # turn into BGR24 numpy array of N,H,W,C since thats what these want
+    inputs = image.convert("RGB").convert("BGR;24")
+    inputs = np.array(inputs).astype(np.float32)
+    inputs = np.expand_dims(inputs, axis=0)
+    # Run the ONNX model
+    probs = model.run([output_name], {input_name: inputs})
+    # Convert indices+probs to labels
+    probs = list(zip(labels.names, probs[0][0].astype(float)))
+    # First 4 labels are actually ratings
+    rating_labels = dict([probs[i] for i in labels.rating])
+    # General labels, pick any where prediction confidence > threshold
+    gen_labels = [probs[i] for i in labels.general]
+    gen_labels = dict([x for x in gen_labels if x[1] > general_threshold])
+    gen_labels = dict(sorted(gen_labels.items(), key=lambda item: item[1], reverse=True))
+    # Character labels, pick any where prediction confidence > threshold
+    char_labels = [probs[i] for i in labels.character]
+    char_labels = dict([x for x in char_labels if x[1] > character_threshold])
+    char_labels = dict(sorted(char_labels.items(), key=lambda item: item[1], reverse=True))
+    # Combine general and character labels, sort by confidence
+    combined_names = [x for x in gen_labels]
+    combined_names.extend([x for x in char_labels])
+    # Convert to a string suitable for use as a training caption
+    caption = ", ".join(combined_names)
+    booru = caption.replace("_", " ").replace("(", "\(").replace(")", "\)")
+    return image, caption, booru, rating_labels, char_labels, gen_labels
+with gr.Blocks(title="pi-chan's tagger") as demo:
+    with gr.Row(equal_height=False):
+        with gr.Column():
+            img_input = gr.Image(
+                label="Input",
+                type="pil",
+                image_mode="RGB",
+                sources=["upload", "clipboard"],
+            )
+            variant = gr.Radio(choices=list(MODEL_VARIANTS.keys()), label="Model Variant", value="MOAT")
+            gen_thresh = gr.Slider(0.0, 1.0, value=0.35, label="General Tag Threshold")
+            char_thresh = gr.Slider(0.0, 1.0, value=0.85, label="Character Tag Threshold")
+            show_processed = gr.Checkbox(label="Show Preprocessed", value=False)
+            with gr.Row():
+                submit = gr.Button(value="Submit", variant="primary", size="lg")
+                clear = gr.ClearButton(
+                    components=[],
+                    variant="secondary",
+                    size="lg",
+                )
+            with gr.Row():
+                examples = gr.Examples(
+                    examples=[
+                        [imgpath, var, 0.35, 0.85]
+                        for imgpath in example_images
+                        for var in ["MOAT", "ConvNeXTv2"]
+                    ],
+                    inputs=[img_input, variant, gen_thresh, char_thresh],
+                )
+        with gr.Column():
+            img_output = gr.Image(label="Preprocessed", type="pil", image_mode="RGB", scale=1, visible=False)
+            with gr.Group():
+                tags_string = gr.Textbox(
+                    label="Caption", placeholder="Caption will appear here", show_copy_button=True
+                )
+                tags_booru = gr.Textbox(
+                    label="Tags", placeholder="Tag string will appear here", show_copy_button=True
+                )
+            rating = gr.Label(label="Rating")
+            character = gr.Label(label="Character")
+            general = gr.Label(label="General")
+    # tell clear button which components to clear
+    clear.add([img_input, img_output, tags_string, rating, character, general])
+    # show/hide processed image
+    def on_select_show_processed(evt: gr.SelectData):
+        return gr.update(visible=evt.selected)
+    show_processed.select(on_select_show_processed, inputs=[], outputs=[img_output])
+    submit.click(
+        predict,
+        inputs=[img_input, variant, gen_thresh, char_thresh],
+        outputs=[img_output, tags_string, tags_booru, rating, character, general],
+        api_name="predict",
+    )
+if __name__ == "__main__":
+    demo.queue(max_size=10)
+    demo.launch(server_name="0.0.0.0", server_port=7871)

data/selected_tags.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

examples/img-01.png ADDED Viewed

Git LFS Details

SHA256: 37a2bec1c653272457c6b6e5fec6da8ac4676d973f7cd87c545a6e1ab6be288c
Pointer size: 132 Bytes
Size of remote file: 1.53 MB

examples/img-02.png ADDED Viewed

Git LFS Details

SHA256: 90ee6035ce0caec46bbda3a9d48bdcd2cd7384487781615c4251301ab5422d45
Pointer size: 131 Bytes
Size of remote file: 434 kB

pyproject.toml ADDED Viewed

	@@ -0,0 +1,87 @@

+[project]
+name = "pi-tagger-space"
+version = "0.1.0"
+authors = [
+  { name = "Andi Powers-Holmes", email = "aholmes@omnom.net" },
+]
+maintainers = [
+  { name = "Andi Powers-Holmes", email = "aholmes@omnom.net" },
+]
+description = "pi-tagger Gradio Space"
+readme = "README.md"
+requires-python = ">=3.9, <3.11"
+keywords = [
+  "deep-learning",
+  "machine-learning",
+  "pytorch",
+]
+license = { file = "LICENSE.md" }
+classifiers = [
+  "Programming Language :: Python :: 3",
+  "License :: OSI Approved :: MIT License",
+]
+dependencies = [
+  "gradio >=4.19.2, < 5.0.0",
+  "numpy >= 1.23.5",
+  "onnxruntime-gpu >= 1.14.1",
+  "pandas >= 2.0.0",
+  "Pillow >= 9.5.0",
+  "PyYAML",
+  "safetensors",
+  "simple-parsing >= 0.1.0",
+]
+[project.urls]
+Repository = "https://huggingface.co/spaces/neggles/pi-tagger"
+[project.optional-dependencies]
+dev = [
+  "ruff >=0.0.289",
+  "setuptools-scm >= 8.0.0",
+  "pre-commit >= 3.0.0",     # remember to run `pre-commit install` after installing
+  "tabulate >= 0.8.9",       # for inductor log prettyprinting
+]
+all = [
+  "pi-tagger-space[dev]",
+]
+[build-system]
+build-backend = "setuptools.build_meta"
+requires = ["setuptools>=64", "wheel"]
+[tool.setuptools.packages.find]
+namespaces = true
+where = ["."]
+include = ["pi-tagger"]
+[tool.ruff]
+line-length = 110
+target-version = "py310"
+extend-exclude = ["/usr/lib/*"]
+[tool.ruff.lint]
+ignore = [
+  "F841", # local variable assigned but never used
+  "F842", # local variable annotated but never used
+  "E501", # line too long - will be fixed in format
+]
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+line-ending = "auto"
+skip-magic-trailing-comma = false
+docstring-code-format = true
+[tool.ruff.lint.isort]
+combine-as-imports = true
+force-wrap-aliases = true
+known-local-folder = ["pi-tagger"]
+known-first-party = ["pi-tagger"]
+[tool.pyright]
+include = ["src/**"]
+exclude = ["/usr/lib/**"]
+stubPath = "./typings"

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio >=4.19.2, < 5.0.0
+numpy >= 1.23.5
+onnxruntime-gpu >= 1.14.1
+pandas >= 2.0.0
+Pillow >= 9.5.0
+safetensors
+simple-parsing >= 0.1.0
+huggingface-hub >= 0.14.0
+hf-transfer

tagger/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from .common import LabelData, load_labels, preprocess_image
+from .model import create_session, download_onnx
+__all__ = [
+    "create_session",
+    "download_onnx",
+    "LabelData",
+    "load_labels",
+    "preprocess_image",
+]

tagger/common.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import json
+from dataclasses import asdict, dataclass
+from functools import lru_cache
+from os import PathLike
+from pathlib import Path
+from typing import Any
+import numpy as np
+import pandas as pd
+from PIL import Image
+class DictJsonMixin:
+    def asdict(self, *args, **kwargs) -> dict[str, Any]:
+        return asdict(self, *args, **kwargs)
+    def asjson(self, *args, **kwargs):
+        return json.dumps(asdict(self, *args, **kwargs))
+@dataclass
+class LabelData(DictJsonMixin):
+    names: list[str]
+    rating: list[np.int64]
+    general: list[np.int64]
+    character: list[np.int64]
+@dataclass
+class ImageLabels(DictJsonMixin):
+    caption: str
+    booru: str
+    rating: dict[str, float]
+    general: dict[str, float]
+    character: dict[str, float]
+@lru_cache(maxsize=5)
+def load_labels(csv_path: PathLike = "data/selected_tags.csv") -> LabelData:
+    csv_path = Path(csv_path).resolve()
+    if not csv_path.is_file():
+        raise FileNotFoundError("No selected_tags.csv found")
+    df: pd.DataFrame = pd.read_csv(csv_path, usecols=["name", "category"])
+    tag_data = LabelData(
+        names=df["name"].tolist(),
+        rating=list(np.where(df["category"] == 9)[0]),
+        general=list(np.where(df["category"] == 0)[0]),
+        character=list(np.where(df["category"] == 4)[0]),
+    )
+    return tag_data
+def pil_ensure_rgb(image: Image.Image) -> Image.Image:
+    # convert to RGB/RGBA if not already (deals with palette images etc.)
+    if image.mode not in ["RGB", "RGBA"]:
+        image = image.convert("RGBA") if "transparency" in image.info else image.convert("RGB")
+    # convert RGBA to RGB with white background
+    if image.mode == "RGBA":
+        canvas = Image.new("RGBA", image.size, (255, 255, 255))
+        canvas.alpha_composite(image)
+        image = canvas.convert("RGB")
+    return image
+def pil_pad_square(
+    image: Image.Image,
+    fill: tuple[int, int, int] = (255, 255, 255),
+) -> Image.Image:
+    w, h = image.size
+    # get the largest dimension so we can pad to a square
+    px = max(image.size)
+    # pad to square with white background
+    canvas = Image.new("RGB", (px, px), fill)
+    canvas.paste(image, ((px - w) // 2, (px - h) // 2))
+    return canvas
+def preprocess_image(
+    image: Image.Image,
+    size_px: int | tuple[int, int],
+    upscale: bool = True,
+) -> Image.Image:
+    """
+    Preprocess an image to be square and centered on a white background.
+    """
+    if isinstance(size_px, int):
+        size_px = (size_px, size_px)
+    # ensure RGB and pad to square
+    image = pil_ensure_rgb(image)
+    image = pil_pad_square(image)
+    # resize to target size
+    if image.size[0] < size_px[0] or image.size[1] < size_px[1]:
+        if upscale is False:
+            raise ValueError("Image is smaller than target size, and upscaling is disabled")
+        image = image.resize(size_px, Image.LANCZOS)
+    if image.size[0] > size_px[0] or image.size[1] > size_px[1]:
+        image.thumbnail(size_px, Image.BICUBIC)
+    return image

tagger/model.py ADDED Viewed

	@@ -0,0 +1,36 @@

+from pathlib import Path
+from typing import Optional
+import onnxruntime as rt
+from huggingface_hub import hf_hub_download
+def download_onnx(
+    repo_id: str,
+    filename: str = "model.onnx",
+    revision: Optional[str] = None,
+    token: Optional[str] = None,
+) -> Path:
+    if not filename.endswith(".onnx"):
+        filename += ".onnx"
+    model_path = hf_hub_download(repo_id=repo_id, filename=filename, revision=revision, token=token)
+    return Path(model_path).resolve()
+def create_session(
+    repo_id: str,
+    revision: Optional[str] = None,
+    token: Optional[str] = None,
+) -> rt.InferenceSession:
+    model_path = download_onnx(repo_id, revision=revision, token=token)
+    if not model_path.is_file():
+        model_path = model_path.joinpath("model.onnx")
+    if not model_path.is_file():
+        raise FileNotFoundError(f"Model not found: {model_path}")
+    model = rt.InferenceSession(
+        str(model_path),
+        providers=[("CUDAExecutionProvider", {}), "CPUExecutionProvider"],
+    )
+    return model