Spaces:

John6666
/

danbooru-tags-transformer-v2-with-wd-tagger-b

Sleeping

App Files Files Community

John6666 commited on 29 days ago

Commit

ab4fc1e

•

0 Parent(s):

Super-squash branch 'main' using huggingface_hub

Browse files

Files changed (14) hide show

.gitattributes +35 -0
README.md +14 -0
app.py +164 -0
character_series_dict.csv +0 -0
danbooru_e621.csv +0 -0
fl2sd3longcap.py +74 -0
output.py +16 -0
pre-requirements.txt +1 -0
requirements.txt +13 -0
tag_group.csv +0 -0
tagger.py +450 -0
utils.py +45 -0
v2.py +214 -0
z3de621conv.py +68 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Danbooru Tags Transformer V2 with WD Tagger & Florence 2 SD3 Captioner
+emoji: 📦🏃
+colorFrom: yellow
+colorTo: yellow
+sdk: gradio
+sdk_version: 4.37.2
+header: mini
+app_file: app.py
+pinned: false
+license: apache-2.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,164 @@

+from PIL import Image
+import gradio as gr
+from v2 import (
+    V2UI,
+    parse_upsampling_output,
+    V2_ALL_MODELS,
+)
+from utils import (
+    gradio_copy_text,
+    COPY_ACTION_JS,
+    V2_ASPECT_RATIO_OPTIONS,
+    V2_RATING_OPTIONS,
+    V2_LENGTH_OPTIONS,
+    V2_IDENTITY_OPTIONS
+)
+from tagger import (
+    predict_tags_wd,
+    convert_danbooru_to_e621_prompt,
+    remove_specific_prompt,
+    insert_recom_prompt,
+    compose_prompt_to_copy,
+    translate_prompt,
+    sort_tags,
+)
+from z3de621conv import (
+    predict_tags_e621,
+)
+from fl2sd3longcap import (
+    predict_tags_fl2_sd3,
+)
+def description_ui():
+    gr.Markdown(
+        """
+## Danbooru Tags Transformer V2 Demo with WD Tagger
+(Image =>) Prompt => Upsampled longer prompt
+- Mod of p1atdev's [Danbooru Tags Transformer V2 Demo](https://huggingface.co/spaces/p1atdev/danbooru-tags-transformer-v2) and [WD Tagger with 🤗 transformers](https://huggingface.co/spaces/p1atdev/wd-tagger-transformers).
+- Models: p1atdev's [wd-swinv2-tagger-v3-hf](https://huggingface.co/p1atdev/wd-swinv2-tagger-v3-hf), [dart-v2-moe-sft](https://huggingface.co/p1atdev/dart-v2-moe-sft), [dart-v2-sft](https://huggingface.co/p1atdev/dart-v2-sft)\
+, toynya's [Z3D-E621-Convnext](https://huggingface.co/toynya/Z3D-E621-Convnext), gokaygokay's [Florence-2-SD3-Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner)
+"""
+    )
+def main():
+    v2 = V2UI()
+    with gr.Blocks() as ui:
+        description_ui()
+        with gr.Row():
+            with gr.Column(scale=2):
+                with gr.Group():
+                    input_image = gr.Image(label="Input image", type="pil", sources=["upload", "clipboard"], height=256)
+                    with gr.Accordion(label="Advanced options", open=False):
+                        general_threshold = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.01, interactive=True)
+                        character_threshold = gr.Slider(label="Character threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.01, interactive=True)
+                        e621_threshold = gr.Slider(label="Threshold (Z3D-E621-Convnext)", minimum=0.0, maximum=1.0, value=0.5, step=0.01, interactive=True)
+                        input_tag_type = gr.Radio(label="Convert tags to", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="danbooru")
+                        recom_prompt = gr.Radio(label="Insert reccomended prompt", choices=["None", "Animagine", "Pony"], value="None", interactive=True)
+                    image_algorithms = gr.CheckboxGroup(["Use WD Tagger", "Use Z3D-E621-Convnext", "Use Florence-2-SD3-Long-Captioner"], label="Algorithms", value=["Use WD Tagger"])
+                    keep_tags = gr.Radio(label="Remove tags leaving only the following", choices=["body", "dress", "all"], value="all")
+                    generate_from_image_btn = gr.Button(value="GENERATE TAGS FROM IMAGE", size="lg", variant="primary")
+                with gr.Group():
+                    input_character = gr.Textbox(label="Character tags", placeholder="hatsune miku")
+                    input_copyright = gr.Textbox(label="Copyright tags", placeholder="vocaloid")
+                    input_general = gr.TextArea(label="General tags", lines=4, placeholder="1girl, ...", value="")
+                    input_tags_to_copy = gr.Textbox(value="", visible=False)
+                    copy_input_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
+                    translate_input_prompt_button = gr.Button(value="Translate prompt to English", size="sm", variant="secondary")
+                    tag_type = gr.Radio(label="Output tag conversion", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="e621", visible=False)
+                    input_rating = gr.Radio(label="Rating", choices=list(V2_RATING_OPTIONS), value="explicit")
+                    with gr.Accordion(label="Advanced options", open=False):
+                        input_aspect_ratio = gr.Radio(label="Aspect ratio", info="The aspect ratio of the image.", choices=list(V2_ASPECT_RATIO_OPTIONS), value="square")
+                        input_length = gr.Radio(label="Length", info="The total length of the tags.", choices=list(V2_LENGTH_OPTIONS), value="very_long")
+                        input_identity = gr.Radio(label="Keep identity", info="How strictly to keep the identity of the character or subject. If you specify the detail of subject in the prompt, you should choose `strict`. Otherwise, choose `none` or `lax`. `none` is very creative but sometimes ignores the input prompt.", choices=list(V2_IDENTITY_OPTIONS), value="lax")
+                        input_ban_tags = gr.Textbox(label="Ban tags", info="Tags to ban from the output.", placeholder="alternate costumen, ...", value="censored")
+                        model_name = gr.Dropdown(label="Model", choices=list(V2_ALL_MODELS.keys()), value=list(V2_ALL_MODELS.keys())[0])
+                        dummy_np = gr.Textbox(label="Negative prompt", value="", visible=False)
+                        recom_animagine = gr.Textbox(label="Animagine reccomended prompt", value="Animagine", visible=False)
+                        recom_pony = gr.Textbox(label="Pony reccomended prompt", value="Pony", visible=False)
+                generate_btn = gr.Button(value="GENERATE TAGS", size="lg", variant="primary")
+                with gr.Group():
+                    output_text = gr.TextArea(label="Output tags", interactive=False, show_copy_button=True)
+                    copy_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
+                    elapsed_time_md = gr.Markdown(label="Elapsed time", value="", visible=False)
+                with gr.Group():
+                    output_text_pony = gr.TextArea(label="Output tags (Pony e621 style)", interactive=False, show_copy_button=True)
+                    copy_btn_pony = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
+        v2.input_components = [
+            model_name,
+            input_copyright,
+            input_character,
+            input_general,
+            input_rating,
+            input_aspect_ratio,
+            input_length,
+            input_identity,
+            input_ban_tags,
+        ]
+        translate_input_prompt_button.click(translate_prompt, inputs=[input_general], outputs=[input_general])
+        translate_input_prompt_button.click(translate_prompt, inputs=[input_character], outputs=[input_character])
+        translate_input_prompt_button.click(translate_prompt, inputs=[input_copyright], outputs=[input_copyright])
+        generate_from_image_btn.click(
+            predict_tags_wd,
+            inputs=[input_image, input_general, image_algorithms, general_threshold, character_threshold],
+            outputs=[
+                input_copyright,
+                input_character,
+                input_general,
+                copy_input_btn,
+            ],
+        ).then(
+            predict_tags_e621,
+            inputs=[input_image, input_general, image_algorithms, e621_threshold],
+            outputs=[input_general],
+        ).then(
+            predict_tags_fl2_sd3,
+            inputs=[input_image, input_general, image_algorithms],
+            outputs=[input_general],
+        ).then(
+            remove_specific_prompt, inputs=[input_general, keep_tags], outputs=[input_general],
+        ).then(
+            convert_danbooru_to_e621_prompt, inputs=[input_general, input_tag_type], outputs=[input_general],
+        ).then(
+            sort_tags, inputs=[input_general], outputs=[input_general],
+        ).then(
+            insert_recom_prompt, inputs=[input_general, dummy_np, recom_prompt], outputs=[input_general, dummy_np],
+        )
+        copy_input_btn.click(compose_prompt_to_copy, inputs=[input_character, input_copyright, input_general], outputs=[input_tags_to_copy]).then(
+            gradio_copy_text, inputs=[input_tags_to_copy], js=COPY_ACTION_JS,
+        )
+        generate_btn.click(
+            parse_upsampling_output(v2.on_generate),
+            inputs=[
+                *v2.input_components,
+            ],
+            outputs=[output_text, elapsed_time_md, copy_btn, copy_btn_pony],
+        ).then(
+            sort_tags, inputs=[output_text], outputs=[output_text],
+        ).then(
+            convert_danbooru_to_e621_prompt, inputs=[output_text, tag_type], outputs=[output_text_pony],
+        ).then(
+            insert_recom_prompt, inputs=[output_text, dummy_np, recom_animagine], outputs=[output_text, dummy_np],
+        ).then(
+            insert_recom_prompt, inputs=[output_text_pony, dummy_np, recom_pony], outputs=[output_text_pony, dummy_np],
+        )
+        copy_btn.click(gradio_copy_text, inputs=[output_text], js=COPY_ACTION_JS)
+        copy_btn_pony.click(gradio_copy_text, inputs=[output_text_pony], js=COPY_ACTION_JS)
+    ui.launch()
+if __name__ == "__main__":
+    main()

character_series_dict.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

danbooru_e621.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

fl2sd3longcap.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from transformers import AutoProcessor, AutoModelForCausalLM
+import spaces
+import re
+from PIL import Image
+import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+fl_model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).eval()
+fl_processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
+def fl_modify_caption(caption: str) -> str:
+    """
+    Removes specific prefixes from captions if present, otherwise returns the original caption.
+    Args:
+        caption (str): A string containing a caption.
+    Returns:
+        str: The caption with the prefix removed if it was present, or the original caption.
+    """
+    # Define the prefixes to remove
+    prefix_substrings = [
+        ('captured from ', ''),
+        ('captured at ', '')
+    ]
+    # Create a regex pattern to match any of the prefixes
+    pattern = '|'.join([re.escape(opening) for opening, _ in prefix_substrings])
+    replacers = {opening.lower(): replacer for opening, replacer in prefix_substrings}
+    # Function to replace matched prefix with its corresponding replacement
+    def replace_fn(match):
+        return replacers[match.group(0).lower()]
+    # Apply the regex to the caption
+    modified_caption = re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE)
+    # If the caption was modified, return the modified version; otherwise, return the original
+    return modified_caption if modified_caption != caption else caption
+@spaces.GPU
+def fl_run_example(image):
+    task_prompt = "<DESCRIPTION>"
+    prompt = task_prompt + "Describe this image in great detail."
+    # Ensure the image is in RGB mode
+    if image.mode != "RGB":
+        image = image.convert("RGB")
+    inputs = fl_processor(text=prompt, images=image, return_tensors="pt")
+    generated_ids = fl_model.generate(
+        input_ids=inputs["input_ids"],
+        pixel_values=inputs["pixel_values"],
+        max_new_tokens=1024,
+        num_beams=3
+    )
+    generated_text = fl_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
+    parsed_answer = fl_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
+    return fl_modify_caption(parsed_answer["<DESCRIPTION>"])
+def predict_tags_fl2_sd3(image: Image.Image, input_tags: str, algo: list[str]):
+    def to_list(s):
+        return [x.strip() for x in s.split(",") if not s == ""]
+    def list_uniq(l):
+        return sorted(set(l), key=l.index)
+    if not "Use Florence-2-SD3-Long-Captioner" in algo:
+        return input_tags
+    tag_list = list_uniq(to_list(input_tags) + to_list(fl_run_example(image) + ", "))
+    tag_list.remove("")
+    return ", ".join(tag_list)

output.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from dataclasses import dataclass
+@dataclass
+class UpsamplingOutput:
+    upsampled_tags: str
+    copyright_tags: str
+    character_tags: str
+    general_tags: str
+    rating_tag: str
+    aspect_ratio_tag: str
+    length_tag: str
+    identity_tag: str
+    elapsed_time: float = 0.0

pre-requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip>=23.0.0

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+torch
+torchvision
+accelerate
+transformers
+optimum[onnxruntime]
+spaces
+dartrs
+httpx==0.13.3
+httpcore
+googletrans==4.0.0rc1
+numpy
+onnxruntime-gpu
+timm

tag_group.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

tagger.py ADDED Viewed

	@@ -0,0 +1,450 @@

+from PIL import Image
+import torch
+import gradio as gr
+import spaces  # ZERO GPU
+from transformers import (
+    AutoImageProcessor,
+    AutoModelForImageClassification,
+)
+WD_MODEL_NAMES = ["p1atdev/wd-swinv2-tagger-v3-hf"]
+WD_MODEL_NAME = WD_MODEL_NAMES[0]
+wd_model = AutoModelForImageClassification.from_pretrained(WD_MODEL_NAME, trust_remote_code=True)
+wd_model.to("cuda" if torch.cuda.is_available() else "cpu")
+wd_processor = AutoImageProcessor.from_pretrained(WD_MODEL_NAME, trust_remote_code=True)
+def _people_tag(noun: str, minimum: int = 1, maximum: int = 5):
+    return (
+        [f"1{noun}"]
+        + [f"{num}{noun}s" for num in range(minimum + 1, maximum + 1)]
+        + [f"{maximum+1}+{noun}s"]
+    )
+PEOPLE_TAGS = (
+    _people_tag("girl") + _people_tag("boy") + _people_tag("other") + ["no humans"]
+)
+RATING_MAP = {
+    "general": "safe",
+    "sensitive": "sensitive",
+    "questionable": "nsfw",
+    "explicit": "explicit, nsfw",
+}
+DANBOORU_TO_E621_RATING_MAP = {
+    "safe": "rating_safe",
+    "sensitive": "rating_safe",
+    "nsfw": "rating_explicit",
+    "explicit, nsfw": "rating_explicit",
+    "explicit": "rating_explicit",
+    "rating:safe": "rating_safe",
+    "rating:general": "rating_safe",
+    "rating:sensitive": "rating_safe",
+    "rating:questionable, nsfw": "rating_explicit",
+    "rating:explicit, nsfw": "rating_explicit",
+}
+def load_dict_from_csv(filename):
+    with open(filename, 'r', encoding="utf-8") as f:
+        lines = f.readlines()
+    dict = {}
+    for line in lines:
+        parts = line.strip().split(',')
+        dict[parts[0]] = parts[1]
+    return dict
+anime_series_dict = load_dict_from_csv('character_series_dict.csv')
+def character_list_to_series_list(character_list):
+    output_series_tag = []
+    series_tag = ""
+    series_dict = anime_series_dict
+    for tag in character_list:
+        series_tag = series_dict.get(tag, "")
+        if tag.endswith(")"):
+            tags = tag.split("(")
+            character_tag = "(".join(tags[:-1])
+            if character_tag.endswith(" "):
+                character_tag = character_tag[:-1]
+            series_tag = tags[-1].replace(")", "")
+    if series_tag:
+        output_series_tag.append(series_tag)
+    return output_series_tag
+def danbooru_to_e621(dtag, e621_dict):
+    def d_to_e(match, e621_dict):
+        dtag = match.group(0)
+        etag = e621_dict.get(dtag.strip().replace("_", " "), "")
+        if etag:
+            return etag
+        else:
+            return dtag
+    import re
+    tag = re.sub(r'[\w ]+', lambda wrapper: d_to_e(wrapper, e621_dict), dtag, 2)
+    return tag
+danbooru_to_e621_dict = load_dict_from_csv('danbooru_e621.csv')
+def convert_danbooru_to_e621_prompt(input_prompt: str = "", prompt_type: str = "danbooru"):
+    if prompt_type == "danbooru": return input_prompt
+    tags = input_prompt.split(",") if input_prompt else []
+    people_tags: list[str] = []
+    other_tags: list[str] = []
+    rating_tags: list[str] = []
+    e621_dict = danbooru_to_e621_dict
+    for tag in tags:
+        tag = tag.strip().replace("_", " ")
+        tag = danbooru_to_e621(tag, e621_dict)
+        if tag in PEOPLE_TAGS:
+            people_tags.append(tag)
+        elif tag in DANBOORU_TO_E621_RATING_MAP.keys():
+            rating_tags.append(DANBOORU_TO_E621_RATING_MAP.get(tag.replace(" ",""), ""))
+        else:
+            other_tags.append(tag)
+    rating_tags = sorted(set(rating_tags), key=rating_tags.index)
+    rating_tags = [rating_tags[0]] if rating_tags else []
+    rating_tags = ["explicit, nsfw"] if rating_tags and rating_tags[0] == "explicit" else rating_tags
+    output_prompt = ", ".join(people_tags + other_tags + rating_tags)
+    return output_prompt
+def translate_prompt(prompt: str = ""):
+    def translate_to_english(prompt):
+        import httpcore
+        setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
+        from googletrans import Translator
+        translator = Translator()
+        try:
+            translated_prompt = translator.translate(prompt, src='auto', dest='en').text
+            return translated_prompt
+        except Exception as e:
+            return prompt
+    def is_japanese(s):
+        import unicodedata
+        for ch in s:
+            name = unicodedata.name(ch, "")
+            if "CJK UNIFIED" in name or "HIRAGANA" in name or "KATAKANA" in name:
+                return True
+        return False
+    def to_list(s):
+        return [x.strip() for x in s.split(",")]
+    prompts = to_list(prompt)
+    outputs = []
+    for p in prompts:
+        p = translate_to_english(p) if is_japanese(p) else p
+        outputs.append(p)
+    return ", ".join(outputs)
+def translate_prompt_to_ja(prompt: str = ""):
+    def translate_to_japanese(prompt):
+        import httpcore
+        setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')
+        from googletrans import Translator
+        translator = Translator()
+        try:
+            translated_prompt = translator.translate(prompt, src='en', dest='ja').text
+            return translated_prompt
+        except Exception as e:
+            return prompt
+    def is_japanese(s):
+        import unicodedata
+        for ch in s:
+            name = unicodedata.name(ch, "")
+            if "CJK UNIFIED" in name or "HIRAGANA" in name or "KATAKANA" in name:
+                return True
+        return False
+    def to_list(s):
+        return [x.strip() for x in s.split(",")]
+    prompts = to_list(prompt)
+    outputs = []
+    for p in prompts:
+        p = translate_to_japanese(p) if not is_japanese(p) else p
+        outputs.append(p)
+    return ", ".join(outputs)
+def tags_to_ja(itag, dict):
+    def t_to_j(match, dict):
+        tag = match.group(0)
+        ja = dict.get(tag.strip().replace("_", " "), "")
+        if ja:
+            return ja
+        else:
+            return tag
+    import re
+    tag = re.sub(r'[\w ]+', lambda wrapper: t_to_j(wrapper, dict), itag, 2)
+    return tag
+def convert_tags_to_ja(input_prompt: str = ""):
+    tags = input_prompt.split(",") if input_prompt else []
+    out_tags = []
+    tags_to_ja_dict = load_dict_from_csv('all_tags_ja_ext.csv')
+    dict = tags_to_ja_dict
+    for tag in tags:
+        tag = tag.strip().replace("_", " ")
+        tag = tags_to_ja(tag, dict)
+        out_tags.append(tag)
+    return ", ".join(out_tags)
+def insert_recom_prompt(prompt: str = "", neg_prompt: str = "", type: str = "None"):
+    def to_list(s):
+        return [x.strip() for x in s.split(",") if not s == ""]
+    def list_sub(a, b):
+        return [e for e in a if e not in b]
+    def list_uniq(l):
+        return sorted(set(l), key=l.index)
+    animagine_ps = to_list("anime artwork, anime style, key visual, vibrant, studio anime, highly detailed, masterpiece, best quality, very aesthetic, absurdres")
+    animagine_nps = to_list("lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract]")
+    pony_ps = to_list("source_anime, score_9, score_8_up, score_7_up, masterpiece, best quality, very aesthetic, absurdres")
+    pony_nps = to_list("source_pony, source_furry, source_cartoon, score_6, score_5, score_4, busty, ugly face, mutated hands, low res, blurry face, black and white, the simpsons, overwatch, apex legends")
+    prompts = to_list(prompt)
+    neg_prompts = to_list(neg_prompt)
+    prompts = list_sub(prompts, animagine_ps + pony_ps)
+    neg_prompts = list_sub(neg_prompts, animagine_nps + pony_nps)
+    last_empty_p = [""] if not prompts and type != "None" else []
+    last_empty_np = [""] if not neg_prompts and type != "None" else []
+    if type == "Animagine":
+        prompts = prompts + animagine_ps
+        neg_prompts = neg_prompts + animagine_nps
+    elif type == "Pony":
+        prompts = prompts + pony_ps
+        neg_prompts = neg_prompts + pony_nps
+    prompt = ", ".join(list_uniq(prompts) + last_empty_p)
+    neg_prompt = ", ".join(list_uniq(neg_prompts) + last_empty_np)
+    return prompt, neg_prompt
+tag_group_dict = load_dict_from_csv('tag_group.csv')
+def remove_specific_prompt(input_prompt: str = "", keep_tags: str = "all"):
+    def is_dressed(tag):
+        import re
+        p = re.compile(r'dress|cloth|uniform|costume|vest|sweater|coat|shirt|jacket|blazer|apron|leotard|hood|sleeve|skirt|shorts|pant|loafer|ribbon|necktie|bow|collar|glove|sock|shoe|boots|wear|emblem')
+        return p.search(tag)
+    def is_background(tag):
+        import re
+        p = re.compile(r'background|outline|light|sky|build|day|screen|tree|city')
+        return p.search(tag)
+    un_tags = ['solo']
+    group_list = ['groups', 'body_parts', 'attire', 'posture', 'objects', 'creatures', 'locations', 'disambiguation_pages', 'commonly_misused_tags', 'phrases', 'verbs_and_gerunds', 'subjective', 'nudity', 'sex_objects', 'sex', 'sex_acts', 'image_composition', 'artistic_license', 'text', 'year_tags', 'metatags']
+    keep_group_dict = {
+        "body": ['groups', 'body_parts'],
+        "dress": ['groups', 'body_parts', 'attire'],
+        "all": group_list,
+    }
+    def is_necessary(tag, keep_tags, group_dict):
+        if keep_tags == "all":
+            return True
+        elif tag in un_tags or group_dict.get(tag, "") in explicit_group:
+            return False
+        elif keep_tags == "body" and is_dressed(tag):
+            return False
+        elif is_background(tag):
+            return False
+        else:
+            return True
+    if keep_tags == "all": return input_prompt
+    keep_group = keep_group_dict.get(keep_tags, keep_group_dict["body"])
+    explicit_group = list(set(group_list) ^ set(keep_group))
+    tags = input_prompt.split(",") if input_prompt else []
+    people_tags: list[str] = []
+    other_tags: list[str] = []
+    group_dict = tag_group_dict
+    for tag in tags:
+        tag = tag.strip().replace("_", " ")
+        if tag in PEOPLE_TAGS:
+            people_tags.append(tag)
+        elif is_necessary(tag, keep_tags, group_dict):
+            other_tags.append(tag)
+    output_prompt = ", ".join(people_tags + other_tags)
+    return output_prompt
+def sort_taglist(tags: list[str]):
+    if not tags: return []
+    character_tags: list[str] = []
+    series_tags: list[str] = []
+    people_tags: list[str] = []
+    group_list = ['groups', 'body_parts', 'attire', 'posture', 'objects', 'creatures', 'locations', 'disambiguation_pages', 'commonly_misused_tags', 'phrases', 'verbs_and_gerunds', 'subjective', 'nudity', 'sex_objects', 'sex', 'sex_acts', 'image_composition', 'artistic_license', 'text', 'year_tags', 'metatags']
+    group_tags = {}
+    other_tags: list[str] = []
+    rating_tags: list[str] = []
+    group_dict = tag_group_dict
+    group_set = set(group_dict.keys())
+    character_set = set(anime_series_dict.keys())
+    series_set = set(anime_series_dict.values())
+    rating_set = set(DANBOORU_TO_E621_RATING_MAP.keys()) | set(DANBOORU_TO_E621_RATING_MAP.values())
+    for tag in tags:
+        tag = tag.strip().replace("_", " ")
+        if tag in PEOPLE_TAGS:
+            people_tags.append(tag)
+        elif tag in rating_set:
+            rating_tags.append(tag)
+        elif tag in group_set:
+            elem = group_dict[tag]
+            group_tags[elem] = group_tags[elem] + [tag] if elem in group_tags else [tag]
+        elif tag in character_set:
+            character_tags.append(tag)
+        elif tag in series_set:
+            series_tags.append(tag)
+        else:
+            other_tags.append(tag)
+    output_group_tags: list[str] = []
+    for k in group_list:
+        output_group_tags.extend(group_tags.get(k, []))
+    rating_tags = [rating_tags[0]] if rating_tags else []
+    rating_tags = ["explicit, nsfw"] if rating_tags and rating_tags[0] == "explicit" else rating_tags
+    output_tags = character_tags + series_tags + people_tags + output_group_tags + other_tags + rating_tags
+    return output_tags
+def sort_tags(tags: str):
+    if not tags: return ""
+    taglist: list[str] = []
+    for tag in tags.split(","):
+        taglist.append(tag.strip())
+    taglist = list(filter(lambda x: x != "", taglist))
+    return ", ".join(sort_taglist(taglist))
+def postprocess_results(results: dict[str, float], general_threshold: float, character_threshold: float):
+    results = {
+        k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True)
+    }
+    rating = {}
+    character = {}
+    general = {}
+    for k, v in results.items():
+        if k.startswith("rating:"):
+            rating[k.replace("rating:", "")] = v
+            continue
+        elif k.startswith("character:"):
+            character[k.replace("character:", "")] = v
+            continue
+        general[k] = v
+    character = {k: v for k, v in character.items() if v >= character_threshold}
+    general = {k: v for k, v in general.items() if v >= general_threshold}
+    return rating, character, general
+def gen_prompt(rating: list[str], character: list[str], general: list[str]):
+    people_tags: list[str] = []
+    other_tags: list[str] = []
+    rating_tag = RATING_MAP[rating[0]]
+    for tag in general:
+        if tag in PEOPLE_TAGS:
+            people_tags.append(tag)
+        else:
+            other_tags.append(tag)
+    all_tags = people_tags + other_tags
+    return ", ".join(all_tags)
+@spaces.GPU()
+def predict_tags(image: Image.Image, general_threshold: float = 0.3, character_threshold: float = 0.8):
+    inputs = wd_processor.preprocess(image, return_tensors="pt")
+    outputs = wd_model(**inputs.to(wd_model.device, wd_model.dtype))
+    logits = torch.sigmoid(outputs.logits[0])  # take the first logits
+    # get probabilities
+    results = {
+        wd_model.config.id2label[i]: float(logit.float()) for i, logit in enumerate(logits)
+    }
+    # rating, character, general
+    rating, character, general = postprocess_results(
+        results, general_threshold, character_threshold
+    )
+    prompt = gen_prompt(
+        list(rating.keys()), list(character.keys()), list(general.keys())
+    )
+    output_series_tag = ""
+    output_series_list = character_list_to_series_list(character.keys())
+    if output_series_list:
+        output_series_tag = output_series_list[0]
+    else:
+        output_series_tag = ""
+    return output_series_tag, ", ".join(character.keys()), prompt, gr.update(interactive=True),
+def predict_tags_wd(image: Image.Image, input_tags: str, algo: list[str], general_threshold: float = 0.3, character_threshold: float = 0.8):
+    if not "Use WD Tagger" in algo and len(algo) != 0:
+        return "", "", input_tags, gr.update(interactive=True),
+    return predict_tags(image, general_threshold, character_threshold)
+def compose_prompt_to_copy(character: str, series: str, general: str):
+    characters = character.split(",") if character else []
+    serieses = series.split(",") if series else []
+    generals = general.split(",") if general else []
+    tags = characters + serieses + generals
+    cprompt = ",".join(tags) if tags else ""
+    return cprompt

utils.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import gradio as gr
+from dartrs.v2 import AspectRatioTag, LengthTag, RatingTag, IdentityTag
+V2_ASPECT_RATIO_OPTIONS: list[AspectRatioTag] = [
+    "ultra_wide",
+    "wide",
+    "square",
+    "tall",
+    "ultra_tall",
+]
+V2_RATING_OPTIONS: list[RatingTag] = [
+    "sfw",
+    "general",
+    "sensitive",
+    "nsfw",
+    "questionable",
+    "explicit",
+]
+V2_LENGTH_OPTIONS: list[LengthTag] = [
+    "very_short",
+    "short",
+    "medium",
+    "long",
+    "very_long",
+]
+V2_IDENTITY_OPTIONS: list[IdentityTag] = [
+    "none",
+    "lax",
+    "strict",
+]
+# ref: https://qiita.com/tregu148/items/fccccbbc47d966dd2fc2
+def gradio_copy_text(_text: None):
+    gr.Info("Copied!")
+COPY_ACTION_JS = """\
+(inputs, _outputs) => {
+  // inputs is the string value of the input_text
+  if (inputs.trim() !== "") {
+    navigator.clipboard.writeText(inputs);
+  }
+}"""

v2.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import time
+import os
+import torch
+from typing import Callable
+from dartrs.v2 import (
+    V2Model,
+    MixtralModel,
+    MistralModel,
+    compose_prompt,
+    LengthTag,
+    AspectRatioTag,
+    RatingTag,
+    IdentityTag,
+)
+from dartrs.dartrs import DartTokenizer
+from dartrs.utils import get_generation_config
+import gradio as gr
+from gradio.components import Component
+try:
+    import spaces
+except ImportError:
+    class spaces:
+        def GPU(*args, **kwargs):
+            return lambda x: x
+from output import UpsamplingOutput
+HF_TOKEN = os.getenv("HF_TOKEN", None)
+V2_ALL_MODELS = {
+    "dart-v2-moe-sft": {
+        "repo": "p1atdev/dart-v2-moe-sft",
+        "type": "sft",
+        "class": MixtralModel,
+    },
+    "dart-v2-sft": {
+        "repo": "p1atdev/dart-v2-sft",
+        "type": "sft",
+        "class": MistralModel,
+    },
+}
+def prepare_models(model_config: dict):
+    model_name = model_config["repo"]
+    tokenizer = DartTokenizer.from_pretrained(model_name, auth_token=HF_TOKEN)
+    model = model_config["class"].from_pretrained(model_name, auth_token=HF_TOKEN)
+    return {
+        "tokenizer": tokenizer,
+        "model": model,
+    }
+def normalize_tags(tokenizer: DartTokenizer, tags: str):
+    """Just remove unk tokens."""
+    return ", ".join([tag for tag in tokenizer.tokenize(tags) if tag != "<|unk|>"])
+@torch.no_grad()
+def generate_tags(
+    model: V2Model,
+    tokenizer: DartTokenizer,
+    prompt: str,
+    ban_token_ids: list[int],
+):
+    output = model.generate(
+        get_generation_config(
+            prompt,
+            tokenizer=tokenizer,
+            temperature=1,
+            top_p=0.9,
+            top_k=100,
+            max_new_tokens=256,
+            ban_token_ids=ban_token_ids,
+        ),
+    )
+    return output
+def _people_tag(noun: str, minimum: int = 1, maximum: int = 5):
+    return (
+        [f"1{noun}"]
+        + [f"{num}{noun}s" for num in range(minimum + 1, maximum + 1)]
+        + [f"{maximum+1}+{noun}s"]
+    )
+PEOPLE_TAGS = (
+    _people_tag("girl") + _people_tag("boy") + _people_tag("other") + ["no humans"]
+)
+def gen_prompt_text(output: UpsamplingOutput):
+    # separate people tags (e.g. 1girl)
+    people_tags = []
+    other_general_tags = []
+    for tag in output.general_tags.split(","):
+        tag = tag.strip()
+        if tag in PEOPLE_TAGS:
+            people_tags.append(tag)
+        else:
+            other_general_tags.append(tag)
+    return ", ".join(
+        [
+            part.strip()
+            for part in [
+                *people_tags,
+                output.character_tags,
+                output.copyright_tags,
+                *other_general_tags,
+                output.upsampled_tags,
+                output.rating_tag,
+            ]
+            if part.strip() != ""
+        ]
+    )
+def elapsed_time_format(elapsed_time: float) -> str:
+    return f"Elapsed: {elapsed_time:.2f} seconds"
+def parse_upsampling_output(
+    upsampler: Callable[..., UpsamplingOutput],
+):
+    def _parse_upsampling_output(*args) -> tuple[str, str, dict]:
+        output = upsampler(*args)
+        return (
+            gen_prompt_text(output),
+            elapsed_time_format(output.elapsed_time),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+        )
+    return _parse_upsampling_output
+class V2UI:
+    model_name: str | None = None
+    model: V2Model
+    tokenizer: DartTokenizer
+    input_components: list[Component] = []
+    generate_btn: gr.Button
+    def on_generate(
+        self,
+        model_name: str,
+        copyright_tags: str,
+        character_tags: str,
+        general_tags: str,
+        rating_tag: RatingTag,
+        aspect_ratio_tag: AspectRatioTag,
+        length_tag: LengthTag,
+        identity_tag: IdentityTag,
+        ban_tags: str,
+        *args,
+    ) -> UpsamplingOutput:
+        if self.model_name is None or self.model_name != model_name:
+            models = prepare_models(V2_ALL_MODELS[model_name])
+            self.model = models["model"]
+            self.tokenizer = models["tokenizer"]
+            self.model_name = model_name
+        # normalize tags
+        # copyright_tags = normalize_tags(self.tokenizer, copyright_tags)
+        # character_tags = normalize_tags(self.tokenizer, character_tags)
+        # general_tags = normalize_tags(self.tokenizer, general_tags)
+        ban_token_ids = self.tokenizer.encode(ban_tags.strip())
+        prompt = compose_prompt(
+            prompt=general_tags,
+            copyright=copyright_tags,
+            character=character_tags,
+            rating=rating_tag,
+            aspect_ratio=aspect_ratio_tag,
+            length=length_tag,
+            identity=identity_tag,
+        )
+        start = time.time()
+        upsampled_tags = generate_tags(
+            self.model,
+            self.tokenizer,
+            prompt,
+            ban_token_ids,
+        )
+        elapsed_time = time.time() - start
+        return UpsamplingOutput(
+            upsampled_tags=upsampled_tags,
+            copyright_tags=copyright_tags,
+            character_tags=character_tags,
+            general_tags=general_tags,
+            rating_tag=rating_tag,
+            aspect_ratio_tag=aspect_ratio_tag,
+            length_tag=length_tag,
+            identity_tag=identity_tag,
+            elapsed_time=elapsed_time,
+        )

z3de621conv.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import huggingface_hub
+from PIL import Image
+from pathlib import Path
+import csv
+import spaces
+import onnxruntime as rt
+e621_model_path = Path(huggingface_hub.snapshot_download('toynya/Z3D-E621-Convnext'))
+e621_model_session = rt.InferenceSession(e621_model_path / 'model.onnx', providers=["CUDAExecutionProvider", "CPUExecutionProvider"])
+with open(e621_model_path / 'tags-selected.csv', mode='r', encoding='utf-8') as file:
+	csv_reader = csv.DictReader(file)
+	e621_model_tags = [row['name'].strip() for row in csv_reader]
+def prepare_image_e621(image: Image.Image, target_size: int):
+	import numpy as np
+	# Pad image to square
+	image_shape = image.size
+	max_dim = max(image_shape)
+	pad_left = (max_dim - image_shape[0]) // 2
+	pad_top = (max_dim - image_shape[1]) // 2
+	padded_image = Image.new("RGB", (max_dim, max_dim), (255, 255, 255))
+	padded_image.paste(image, (pad_left, pad_top))
+	# Resize
+	if max_dim != target_size:
+		padded_image = padded_image.resize((target_size, target_size), Image.BICUBIC)
+	# Convert to numpy array
+	# Based on the ONNX graph, the model appears to expect inputs in the range of 0-255
+	image_array = np.asarray(padded_image, dtype=np.float32)
+	# Convert PIL-native RGB to BGR
+	image_array = image_array[:, :, ::-1]
+	return np.expand_dims(image_array, axis=0)
+@spaces.GPU
+def predict_e621(image: Image.Image, threshold: float = 0.3):
+	image_array = prepare_image_e621(image, 448)
+	image_array = prepare_image_e621(image, 448)
+	input_name = 'input_1:0'
+	output_name = 'predictions_sigmoid'
+	result = e621_model_session.run([output_name], {input_name: image_array})
+	result = result[0][0]
+	scores = {e621_model_tags[i]: result[i] for i in range(len(result))}
+	predicted_tags = [tag for tag, score in scores.items() if score > threshold]
+	tag_string = ', '.join(predicted_tags).replace("_", " ")
+	return tag_string
+def predict_tags_e621(image: Image.Image, input_tags: str, algo: list[str], threshold: float = 0.3):
+    def to_list(s):
+        return [x.strip() for x in s.split(",") if not s == ""]
+    def list_uniq(l):
+        return sorted(set(l), key=l.index)
+    if not "Use Z3D-E621-Convnext" in algo:
+        return input_tags
+    tag_list = list_uniq(to_list(input_tags) + to_list(predict_e621(image)))
+    return ", ".join(tag_list)