Spaces:

p1atdev
/

danbooru-tags-transformer-v2

Running on Zero

App Files Files Community

p1atdev commited on May 1

Commit

cf72c4b

•

1 Parent(s): 5043faf

initial commit

Browse files

Files changed (8) hide show

.gitignore +176 -0
README.md +3 -3
app.py +185 -0
diffusion.py +71 -0
output.py +16 -0
requirements.txt +5 -0
utils.py +44 -0
v2.py +254 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,176 @@

+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+# End of https://www.toptal.com/developers/gitignore/api/python

README.md CHANGED Viewed

@@ -1,8 +1,8 @@
 ---
 title: Danbooru Tags Transformer V2
-emoji: 🐢
-colorFrom: purple
-colorTo: pink
 sdk: gradio
 sdk_version: 4.28.3
 app_file: app.py

 ---
 title: Danbooru Tags Transformer V2
+emoji: 📦
+colorFrom: yellow
+colorTo: yellow
 sdk: gradio
 sdk_version: 4.28.3
 app_file: app.py

app.py ADDED Viewed

	@@ -0,0 +1,185 @@

+from typing import Callable
+from PIL import Image
+import gradio as gr
+from v2 import V2UI
+from diffusion import ImageGenerator
+from output import UpsamplingOutput
+from utils import QUALITY_TAGS, NEGATIVE_PROMPT, IMAGE_SIZE_OPTIONS, IMAGE_SIZES
+def animagine_xl_v3_1(output: UpsamplingOutput):
+    return ", ".join(
+        [
+            part.strip()
+            for part in [
+                output.character_tags,
+                output.copyright_tags,
+                output.general_tags,
+                output.upsampled_tags,
+                (
+                    output.rating_tag
+                    if output.rating_tag not in ["<|rating:sfw|>", "<|rating:general|>"]
+                    else ""
+                ),
+            ]
+            if part.strip() != ""
+        ]
+    )
+def elapsed_time_format(elapsed_time: float) -> str:
+    return f"Elapsed: {elapsed_time:.2f} seconds"
+def parse_upsampling_output(
+    upsampler: Callable[..., UpsamplingOutput],
+    image_generator: Callable[..., Image.Image],
+):
+    def _parse_upsampling_output(
+        generate_image: bool, *args
+    ) -> tuple[str, str, Image.Image | None]:
+        output = upsampler(*args)
+        print(output)
+        if not generate_image:
+            return (
+                animagine_xl_v3_1(output),
+                elapsed_time_format(output.elapsed_time),
+                None,
+            )
+        # generate image
+        [
+            image_size_option,
+            quality_tags,
+            negative_prompt,
+            num_inference_steps,
+            guidance_scale,
+        ] = args[
+            7:
+        ]  # remove the first 7 arguments for upsampler
+        width, height = IMAGE_SIZES[image_size_option]
+        image = image_generator(
+            ", ".join([animagine_xl_v3_1(output), quality_tags]),
+            negative_prompt,
+            height,
+            width,
+            num_inference_steps,
+            guidance_scale,
+        )
+        return (
+            animagine_xl_v3_1(output),
+            elapsed_time_format(output.elapsed_time),
+            image,
+        )
+    return _parse_upsampling_output
+def toggle_visible_output_image(generate_image: bool):
+    return gr.update(
+        visible=generate_image,
+    )
+def image_generation_config_ui():
+    with gr.Accordion(label="Image generation config", open=True) as accordion:
+        image_size = gr.Radio(
+            label="Image size",
+            choices=list(IMAGE_SIZE_OPTIONS.keys()),
+            value=list(IMAGE_SIZE_OPTIONS.keys())[3],  # tall
+        )
+        quality_tags = gr.Textbox(
+            label="Quality tags",
+            placeholder=QUALITY_TAGS["default"],
+            value=QUALITY_TAGS["default"],
+        )
+        negative_prompt = gr.Textbox(
+            label="Negative prompt",
+            placeholder=NEGATIVE_PROMPT["default"],
+            value=NEGATIVE_PROMPT["default"],
+        )
+        num_inference_steps = gr.Slider(
+            label="Num inference steps",
+            minimum=20,
+            maximum=30,
+            step=1,
+            value=25,
+        )
+        guidance_scale = gr.Slider(
+            label="Guidance scale",
+            minimum=0.0,
+            maximum=10.0,
+            step=0.5,
+            value=7.0,
+        )
+    return accordion, [
+        image_size,
+        quality_tags,
+        negative_prompt,
+        num_inference_steps,
+        guidance_scale,
+    ]
+def main():
+    v2 = V2UI()
+    print("Loading diffusion model...")
+    image_generator = ImageGenerator()
+    print("Loaded.")
+    with gr.Blocks() as ui:
+        with gr.Row():
+            with gr.Column():
+                v2.ui()
+                generate_image_check = gr.Checkbox(
+                    label="Also generate image", value=True
+                )
+                accordion, image_generation_config_components = (
+                    image_generation_config_ui()
+                )
+            with gr.Column():
+                output_text = gr.TextArea(label="Output tags", interactive=False)
+                elapsed_time_md = gr.Markdown(label="Elapsed time", value="")
+                output_image = gr.Gallery(
+                    label="Output image",
+                    columns=1,
+                    preview=True,
+                    show_label=False,
+                    visible=True,
+                )
+        v2.get_generate_btn().click(
+            parse_upsampling_output(v2.on_generate, image_generator.generate),
+            inputs=[
+                generate_image_check,
+                *v2.get_inputs(),
+                *image_generation_config_components,
+            ],
+            outputs=[output_text, elapsed_time_md, output_image],
+        )
+        generate_image_check.change(
+            toggle_visible_output_image,
+            inputs=[generate_image_check],
+            outputs=[output_image],
+        )
+    ui.launch()
+if __name__ == "__main__":
+    main()

diffusion.py ADDED Viewed

	@@ -0,0 +1,71 @@

+from PIL import Image
+import torch
+from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
+    StableDiffusionXLPipeline,
+)
+from diffusers.schedulers.scheduling_euler_ancestral_discrete import (
+    EulerAncestralDiscreteScheduler,
+)
+try:
+    import spaces
+except ImportError:
+    class spaces:
+        def GPU(*args, **kwargs):
+            return lambda x: x
+from utils import NEGATIVE_PROMPT
+class ImageGenerator:
+    pipe: StableDiffusionXLPipeline
+    def __init__(self, model_name: str = "cagliostrolab/animagine-xl-3.1"):
+        self.pipe = StableDiffusionXLPipeline.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,
+            custom_pipeline="lpw_stable_diffusion_xl",
+            use_safetensors=True,
+            add_watermarker=False,
+        )
+        self.pipe.bad_punct_regexscheduler = (
+            EulerAncestralDiscreteScheduler.from_pretrained(
+                model_name,
+                subfolder="scheduler",
+            )
+        )
+        # xformers
+        self.pipe.enable_xformers_memory_efficient_attention()
+        self.pipe.to("cuda")
+    @torch.no_grad()
+    @spaces.GPU(duration=30)
+    def generate(
+        self,
+        prompt: str,
+        negative_prompt: str = NEGATIVE_PROMPT["default"],  # Light v3.1
+        height: int = 1152,
+        width: int = 896,
+        num_inference_steps: int = 25,
+        guidance_scale: float = 7.0,
+    ) -> Image.Image:
+        print("prompt", prompt)
+        print("negative_prompt", negative_prompt)
+        print("height", height)
+        print("width", width)
+        print("num_inference_steps", num_inference_steps)
+        print("guidance_scale", guidance_scale)
+        return self.pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            height=height,
+            width=width,
+            num_inference_steps=num_inference_steps,
+            guidance_scale=guidance_scale,
+        ).images[0]

output.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from dataclasses import dataclass
+@dataclass
+class UpsamplingOutput:
+    upsampled_tags: str
+    copyright_tags: str
+    character_tags: str
+    general_tags: str
+    rating_tag: str
+    aspect_ratio_tag: str
+    length_tag: str
+    identity_tag: str
+    elapsed_time: float = 0.0

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+torch==2.2.0
+accelerate==0.29.2
+transformers==4.38.2
+optimum[onnxruntime]==1.19.1
+spaces==0.26.2

utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# from https://huggingface.co/spaces/cagliostrolab/animagine-xl-3.1/blob/main/config.py
+QUALITY_TAGS = {
+    "default": "(masterpiece), best quality, very aesthetic, perfect face",
+}
+NEGATIVE_PROMPT = {
+    "default": "nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn",
+}
+IMAGE_SIZE_OPTIONS = {
+    "1536x640": "<|aspect_ratio:ultra_wide|>",
+    "1216x832": "<|aspect_ratio:wide|>",
+    "1024x1024": "<|aspect_ratio:square|>",
+    "832x1216": "<|aspect_ratio:tall|>",
+    "640x1536": "<|aspect_ratio:ultra_tall|>",
+}
+IMAGE_SIZES = {
+    "1536x640": (1536, 640),
+    "1216x832": (1216, 832),
+    "1024x1024": (1024, 1024),
+    "832x1216": (832, 1216),
+    "640x1536": (640, 1536),
+}
+RATING_OPTIONS = {
+    "sfw": "<|rating:sfw|>",
+    "general": "<|rating:general|>",
+    "sensitive": "<|rating:sensitive|>",
+    "nsfw": "<|rating:nsfw|>",
+    "questionable": "<|rating:questionable|>",
+    "explicit": "<|rating:explicit|>",
+}
+LENGTH_OPTIONS = {
+    "very_short": "<|length:very_short|>",
+    "short": "<|length:short|>",
+    "medium": "<|length:medium|>",
+    "long": "<|length:long|>",
+    "very_long": "<|length:very_long|>",
+}
+IDENTITY_OPTIONS = {
+    "none": "<|identity:none|>",
+    "lax": "<|identity:lax|>",
+    "strict": "<|identity:strict|>",
+}

v2.py ADDED Viewed

	@@ -0,0 +1,254 @@

+import time
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, PreTrainedTokenizerBase
+import gradio as gr
+from gradio.components import Component
+try:
+    import spaces
+except ImportError:
+    class spaces:
+        def GPU(*args, **kwargs):
+            return lambda x: x
+from output import UpsamplingOutput
+from utils import IMAGE_SIZE_OPTIONS, RATING_OPTIONS, LENGTH_OPTIONS, IDENTITY_OPTIONS
+ALL_MODELS = {
+    "dart-v2-llama-100m-sft": {
+        "repo": "p1atdev/dart-v2-llama-100m-sft",
+        "type": "sft",
+    },
+    "dart-v2-mistral-100m-sft": {
+        "repo": "p1atdev/dart-v2-mistral-100m-sft",
+        "type": "sft",
+    },
+    "dart-v2-mixtral-160m-sft": {
+        "repo": "p1atdev/dart-v2-mixtral-160m-sft",
+        "type": "sft",
+    },
+}
+def prepare_models(model_name: str):
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_name,
+        torch_dtype=torch.bfloat16,
+        device_map="auto",
+    )
+    return {
+        "tokenizer": tokenizer,
+        "model": model,
+    }
+def normalize_tags(tokenizer: PreTrainedTokenizerBase, tags: str):
+    """Just remove unk tokens."""
+    return ", ".join(
+        tokenizer.batch_decode(
+            [
+                token
+                for token in tokenizer.encode_plus(
+                    tags,
+                    return_tensors="pt",
+                ).input_ids[0]
+                if int(token) != tokenizer.unk_token_id
+            ],
+            skip_special_tokens=True,
+        )
+    )
+def compose_prompt(
+    copyright: str = "",
+    character: str = "",
+    general: str = "",
+    rating: str = "<|rating:sfw|>",
+    aspect_ratio: str = "<|aspect_ratio:tall|>",
+    length: str = "<|length:long|>",
+    identity: str = "<|identity:none|>",
+):
+    prompt = (
+        f"<|bos|>"
+        f"<copyright>{copyright.strip()}</copyright>"
+        f"<character>{character.strip()}</character>"
+        f"{rating}{aspect_ratio}{length}"
+        f"<general>{general.strip()}{identity}<|input_end|>"
+    )
+    return prompt
+@torch.no_grad()
+@spaces.GPU(duration=5)
+def generate_tags(
+    model,
+    tokenizer: PreTrainedTokenizerBase,
+    prompt: str,
+):
+    print(  # debug
+        tokenizer.tokenize(
+            prompt,
+            add_special_tokens=False,
+        )
+    )
+    input_ids = tokenizer.encode_plus(prompt, return_tensors="pt").input_ids
+    output = model.generate(
+        input_ids.to(model.device),
+        do_sample=True,
+        temperature=1,
+        top_p=0.9,
+        top_k=100,
+        num_beams=1,
+        num_return_sequences=1,
+        max_length=256,
+    )
+    # remove input tokens
+    pure_output_ids = output[0][len(input_ids[0]) :]
+    return ", ".join(
+        [
+            token
+            for token in tokenizer.batch_decode(
+                pure_output_ids, skip_special_tokens=True
+            )
+            if token.strip() != ""
+        ]
+    )
+class V2UI:
+    model_name: str | None = None
+    model: AutoModelForCausalLM
+    tokenizer: PreTrainedTokenizerBase
+    input_components: list[Component] = []
+    generate_btn: gr.Button
+    def on_generate(
+        self,
+        model_name: str,
+        copyright_tags: str,
+        character_tags: str,
+        general_tags: str,
+        rating_option: str,
+        # aspect_ratio_option: str,
+        length_option: str,
+        identity_option: str,
+        image_size: str,  # this is from image generation config
+        *args,
+    ) -> UpsamplingOutput:
+        if self.model_name is None or self.model_name != model_name:
+            models = prepare_models(ALL_MODELS[model_name]["repo"])
+            self.model = models["model"]
+            self.tokenizer = models["tokenizer"]
+            self.model_name = model_name
+        # normalize tags
+        copyright_tags = normalize_tags(self.tokenizer, copyright_tags)
+        character_tags = normalize_tags(self.tokenizer, character_tags)
+        general_tags = normalize_tags(self.tokenizer, general_tags)
+        rating_tag = RATING_OPTIONS[rating_option]
+        aspect_ratio_tag = IMAGE_SIZE_OPTIONS[image_size]
+        length_tag = LENGTH_OPTIONS[length_option]
+        identity_tag = IDENTITY_OPTIONS[identity_option]
+        prompt = compose_prompt(
+            copyright=copyright_tags,
+            character=character_tags,
+            general=general_tags,
+            rating=rating_tag,
+            aspect_ratio=aspect_ratio_tag,
+            length=length_tag,
+            identity=identity_tag,
+        )
+        start = time.time()
+        upsampled_tags = generate_tags(
+            self.model,
+            self.tokenizer,
+            prompt,
+        )
+        elapsed_time = time.time() - start
+        return UpsamplingOutput(
+            upsampled_tags=upsampled_tags,
+            copyright_tags=copyright_tags,
+            character_tags=character_tags,
+            general_tags=general_tags,
+            rating_tag=rating_tag,
+            aspect_ratio_tag=aspect_ratio_tag,
+            length_tag=length_tag,
+            identity_tag=identity_tag,
+            elapsed_time=elapsed_time,
+        )
+    def ui(self):
+        input_copyright = gr.Textbox(
+            label="Copyright tags",
+            placeholder="vocaloid",
+        )
+        input_character = gr.Textbox(
+            label="Character tags",
+            placeholder="hatsune miku",
+        )
+        input_general = gr.TextArea(
+            label="General tags",
+            lines=4,
+            placeholder="1girl, ...",
+            value="1girl",
+        )
+        input_rating = gr.Radio(
+            label="Rating",
+            choices=list(RATING_OPTIONS.keys()),
+            value="general",
+        )
+        # input_aspect_ratio = gr.Radio(
+        #     label="Aspect ratio",
+        #     choices=["ultra_wide", "wide", "square", "tall", "ultra_tall"],
+        #     value="tall",
+        # )
+        input_length = gr.Radio(
+            label="Length",
+            choices=list(LENGTH_OPTIONS.keys()),
+            value="long",
+        )
+        input_identity = gr.Radio(
+            label="Identity",
+            choices=list(IDENTITY_OPTIONS.keys()),
+            value="lax",
+        )
+        model_name = gr.Dropdown(
+            label="Model",
+            choices=list(ALL_MODELS.keys()),
+            value=list(ALL_MODELS.keys())[0],
+        )
+        self.generate_btn = gr.Button(value="Generate", variant="primary")
+        self.input_components = [
+            model_name,
+            input_copyright,
+            input_character,
+            input_general,
+            input_rating,
+            # input_aspect_ratio,
+            input_length,
+            input_identity,
+        ]
+    def get_generate_btn(self) -> gr.Button:
+        return self.generate_btn
+    def get_inputs(self) -> list[Component]:
+        return self.input_components