DiffuseCraftMod

Running on Zero

App Files Files Community

John6666 commited on 29 days ago

Commit

b47fcc1

•

1 Parent(s): 8865e09

Upload 5 files

Browse files

Files changed (5) hide show

app.py +60 -118
output.py +0 -1
tagger.py +47 -103
utils.py +4 -35
v2.py +65 -4

app.py CHANGED Viewed

@@ -323,33 +323,32 @@ logger.setLevel(logging.DEBUG)
 from v2 import (
     V2UI,
-    ALL_MODELS,
 )
 from utils import (
     gradio_copy_text,
     COPY_ACTION_JS,
-    ASPECT_RATIO_OPTIONS,
-    RATING_OPTIONS,
-    LENGTH_OPTIONS,
-    IDENTITY_OPTIONS
 )
 from tagger import (
     predict_tags,
-    parse_upsampling_output,
     convert_danbooru_to_e621_prompt,
     insert_recom_prompt,
     translate_prompt,
 )
 def description_ui():
     gr.Markdown(
         """
 ## Danbooru Tags Transformer V2 Demo with WD Tagger
-It’ｓ a modification of [p1atdev's Danbooru Tags Transformer V2 Demo](https://huggingface.co/spaces/p1atdev/danbooru-tags-transformer-v2) and [p1atdev's WD Tagger with 🤗 transformers](https://huggingface.co/spaces/p1atdev/wd-tagger-transformers).
-Models:
-- [p1atdev/wd-swinv2-tagger-v3-hf](https://huggingface.co/p1atdev/wd-swinv2-tagger-v3-hf)
-- [dart-v2-moe-sft](https://huggingface.co/p1atdev/dart-v2-moe-sft) (Mixtral architecture)
-- [dart-v2-sft](https://huggingface.co/p1atdev/dart-v2-sft) (Mistral architecture)
 """
     )
@@ -704,11 +703,11 @@ with gr.Blocks(theme="NoCrypt/miku", css=CSS) as app:
                             character_dbt = gr.Textbox(lines=1, placeholder="kafuu chino, ...", label="Character names", scale=2)
                             series_dbt = gr.Textbox(lines=1, placeholder="Is the order a rabbit?, ...", label="Series names", scale=2)
                             generate_db_random_button = gr.Button(value="Generate random prompt from character", size="sm", variant="secondary")
-                            model_name_dbt = gr.Dropdown(label="Model", choices=list(ALL_MODELS.keys()), value=list(ALL_MODELS.keys())[0], visible=False)
-                            rating_dbt = gr.Radio(label="Rating", choices=list(RATING_OPTIONS), value="explicit", visible=False)
-                            aspect_ratio_dbt = gr.Radio(label="Aspect ratio", choices=list(ASPECT_RATIO_OPTIONS), value="square", visible=False)
-                            length_dbt = gr.Radio(label="Length", choices=list(LENGTH_OPTIONS), value="very_long", visible=False)
-                            identity_dbt = gr.Radio(label="Keep identity", choices=list(IDENTITY_OPTIONS), value="lax", visible=False)
                             ban_tags_dbt = gr.Textbox(label="Ban tags", placeholder="alternate costumen, ...", value="futanari, censored, furry, furrification", visible=False)
                             elapsed_time_dbt = gr.Markdown(label="Elapsed time", value="", visible=False)
                             copy_button_dbt = gr.Button(value="Copy to clipboard", visible=False)
@@ -737,8 +736,8 @@ with gr.Blocks(theme="NoCrypt/miku", css=CSS) as app:
                 )
                 with gr.Accordion("Generation settings", open=False, visible=True):
-                    steps_gui = gr.Slider(minimum=1, maximum=100, step=1, value=30, label="Steps")
-                    cfg_gui = gr.Slider(minimum=0, maximum=30, step=0.5, value=7.5, label="CFG")
                     sampler_gui = gr.Dropdown(label="Sampler", choices=scheduler_names, value="Euler a")
                     img_width_gui = gr.Slider(minimum=64, maximum=4096, step=8, value=1024, label="Img Width")
                     img_height_gui = gr.Slider(minimum=64, maximum=4096, step=8, value=1024, label="Img Height")
@@ -963,7 +962,6 @@ with gr.Blocks(theme="NoCrypt/miku", css=CSS) as app:
             length_dbt,
             identity_dbt,
             ban_tags_dbt,
-            prompt_type_gui,
         ]
         insert_prompt_gui.change(
@@ -983,7 +981,7 @@ with gr.Blocks(theme="NoCrypt/miku", css=CSS) as app:
             inputs=[
                 *v2b.input_components,
             ],
-            outputs=[prompt_gui, elapsed_time_dbt, copy_button_dbt],
         )
         translate_prompt_button.click(translate_prompt, inputs=[prompt_gui], outputs=[prompt_gui])
@@ -1094,102 +1092,41 @@ with gr.Blocks(theme="NoCrypt/miku", css=CSS) as app:
                 with gr.Group():
                     input_image = gr.Image(label="Input image", type="pil", sources=["upload", "clipboard"])
                     with gr.Accordion(label="Advanced options", open=False):
-                        general_threshold = gr.Slider(
-                            label="Threshold",
-                            minimum=0.0,
-                            maximum=1.0,
-                            value=0.3,
-                            step=0.01,
-                            interactive=True,
-                        )
-                        character_threshold = gr.Slider(
-                            label="Character threshold",
-                            minimum=0.0,
-                            maximum=1.0,
-                            value=0.8,
-                            step=0.01,
-                            interactive=True,
-                        )
-                    keep_tags = gr.Radio(
-                        label="Remove tags leaving only the following",
-                        choices=["body", "dress", "all"],
-                        value="body",
-                    )
-                    generate_from_image_btn = gr.Button(value="Generate input tags from image", variant="primary")
                 with gr.Group():
-                    input_character = gr.Textbox(
-                        label="Character tags",
-                        placeholder="hatsune miku",
-                    )
-                    input_copyright = gr.Textbox(
-                        label="Copyright tags",
-                        placeholder="vocaloid",
-                    )
-                    input_general = gr.TextArea(
-                        label="General tags",
-                        lines=4,
-                        placeholder="1girl, solo, ...",
-                        value="",
-                    )
                     input_tags_to_copy = gr.Textbox(value="", visible=False)
-                    copy_input_btn = gr.Button(
-                        value="Copy to clipboard",
-                        interactive=False,
-                    )
-                    translate_input_prompt_button = gr.Button(value="Translate prompt to English", variant="secondary")
-                    tag_type = gr.Radio(
-                        label="Output tag conversion",
-                        info="danbooru for Animagine, e621 for Pony.",
-                        choices=["danbooru", "e621"],
-                        value="danbooru",
-                    )
-                    input_rating = gr.Radio(
-                        label="Rating",
-                        choices=list(RATING_OPTIONS),
-                        value="explicit",
-                    )
                     with gr.Accordion(label="Advanced options", open=False):
-                        input_aspect_ratio = gr.Radio(
-                            label="Aspect ratio",
-                            info="The aspect ratio of the image.",
-                            choices=list(ASPECT_RATIO_OPTIONS),
-                            value="square",
-                        )
-                        input_length = gr.Radio(
-                            label="Length",
-                            info="The total length of the tags.",
-                            choices=list(LENGTH_OPTIONS),
-                            value="very_long",
-                        )
-                        input_identity = gr.Radio(
-                            label="Keep identity",
-                            info="How strictly to keep the identity of the character or subject. If you specify the detail of subject in the prompt, you should choose `strict`. Otherwise, choose `none` or `lax`. `none` is very creative but sometimes ignores the input prompt.",
-                            choices=list(IDENTITY_OPTIONS),
-                            value="lax",
-                        )
-                        input_ban_tags = gr.Textbox(
-                            label="Ban tags",
-                            info="Tags to ban from the output.",
-                            placeholder="alternate costumen, ...",
-                            value="futanari, censored, furry, furrification"
-                        )
-                        model_name = gr.Dropdown(
-                            label="Model",
-                            choices=list(ALL_MODELS.keys()),
-                            value=list(ALL_MODELS.keys())[0],
-                        )
-                generate_btn = gr.Button(value="GENERATE TAGS", variant="primary")
                 with gr.Group():
-                    output_text = gr.TextArea(label="Output tags", interactive=False)
-                    copy_btn = gr.Button(
-                        value="Copy to clipboard",
-                        interactive=False,
-                    )
-                    elapsed_time_md = gr.Markdown(label="Elapsed time", value="")
                 description_ui()
         v2.input_components = [
@@ -1202,7 +1139,6 @@ with gr.Blocks(theme="NoCrypt/miku", css=CSS) as app:
             input_length,
             input_identity,
             input_ban_tags,
-            tag_type,
         ]
         translate_input_prompt_button.click(translate_prompt, inputs=[input_general], outputs=[input_general])
@@ -1211,27 +1147,33 @@ with gr.Blocks(theme="NoCrypt/miku", css=CSS) as app:
         generate_from_image_btn.click(
             predict_tags,
-            inputs=[input_image, general_threshold, character_threshold, keep_tags],
             outputs=[
                 input_copyright,
                 input_character,
                 input_general,
-                input_tags_to_copy,
                 copy_input_btn,
             ],
         )
-        copy_input_btn.click(gradio_copy_text, inputs=[input_tags_to_copy], js=COPY_ACTION_JS)
         generate_btn.click(
             parse_upsampling_output(v2.on_generate),
             inputs=[
                 *v2.input_components,
             ],
-            outputs=[output_text, elapsed_time_md, copy_btn],
         )
         copy_btn.click(gradio_copy_text, inputs=[output_text], js=COPY_ACTION_JS)
     with gr.Accordion("Examples", open=True, visible=True):
         gr.Examples(

 from v2 import (
     V2UI,
+    parse_upsampling_output,
+    V2_ALL_MODELS,
 )
 from utils import (
     gradio_copy_text,
     COPY_ACTION_JS,
+    V2_ASPECT_RATIO_OPTIONS,
+    V2_RATING_OPTIONS,
+    V2_LENGTH_OPTIONS,
+    V2_IDENTITY_OPTIONS
 )
 from tagger import (
     predict_tags,
     convert_danbooru_to_e621_prompt,
+    remove_specific_prompt,
     insert_recom_prompt,
+    compose_prompt_to_copy,
     translate_prompt,
 )
 def description_ui():
     gr.Markdown(
         """
 ## Danbooru Tags Transformer V2 Demo with WD Tagger
+Image => Prompt => Upsampled longer prompt
+- Mod of p1atdev's [Danbooru Tags Transformer V2 Demo](https://huggingface.co/spaces/p1atdev/danbooru-tags-transformer-v2) and [WD Tagger with 🤗 transformers](https://huggingface.co/spaces/p1atdev/wd-tagger-transformers).
+- Models: p1atdev's [wd-swinv2-tagger-v3-hf](https://huggingface.co/p1atdev/wd-swinv2-tagger-v3-hf), [dart-v2-moe-sft](https://huggingface.co/p1atdev/dart-v2-moe-sft), [dart-v2-sft](https://huggingface.co/p1atdev/dart-v2-sft)
 """
     )
                             character_dbt = gr.Textbox(lines=1, placeholder="kafuu chino, ...", label="Character names", scale=2)
                             series_dbt = gr.Textbox(lines=1, placeholder="Is the order a rabbit?, ...", label="Series names", scale=2)
                             generate_db_random_button = gr.Button(value="Generate random prompt from character", size="sm", variant="secondary")
+                            model_name_dbt = gr.Dropdown(label="Model", choices=list(V2_ALL_MODELS.keys()), value=list(V2_ALL_MODELS.keys())[0], visible=False)
+                            rating_dbt = gr.Radio(label="Rating", choices=list(V2_RATING_OPTIONS), value="explicit", visible=False)
+                            aspect_ratio_dbt = gr.Radio(label="Aspect ratio", choices=list(V2_ASPECT_RATIO_OPTIONS), value="square", visible=False)
+                            length_dbt = gr.Radio(label="Length", choices=list(V2_LENGTH_OPTIONS), value="very_long", visible=False)
+                            identity_dbt = gr.Radio(label="Keep identity", choices=list(V2_IDENTITY_OPTIONS), value="lax", visible=False)
                             ban_tags_dbt = gr.Textbox(label="Ban tags", placeholder="alternate costumen, ...", value="futanari, censored, furry, furrification", visible=False)
                             elapsed_time_dbt = gr.Markdown(label="Elapsed time", value="", visible=False)
                             copy_button_dbt = gr.Button(value="Copy to clipboard", visible=False)
                 )
                 with gr.Accordion("Generation settings", open=False, visible=True):
+                    steps_gui = gr.Slider(minimum=1, maximum=100, step=1, value=28, label="Steps")
+                    cfg_gui = gr.Slider(minimum=0, maximum=30, step=0.5, value=7.0, label="CFG")
                     sampler_gui = gr.Dropdown(label="Sampler", choices=scheduler_names, value="Euler a")
                     img_width_gui = gr.Slider(minimum=64, maximum=4096, step=8, value=1024, label="Img Width")
                     img_height_gui = gr.Slider(minimum=64, maximum=4096, step=8, value=1024, label="Img Height")
             length_dbt,
             identity_dbt,
             ban_tags_dbt,
         ]
         insert_prompt_gui.change(
             inputs=[
                 *v2b.input_components,
             ],
+            outputs=[prompt_gui, elapsed_time_dbt, copy_button_dbt, copy_button_dbt],
         )
         translate_prompt_button.click(translate_prompt, inputs=[prompt_gui], outputs=[prompt_gui])
                 with gr.Group():
                     input_image = gr.Image(label="Input image", type="pil", sources=["upload", "clipboard"])
                     with gr.Accordion(label="Advanced options", open=False):
+                        general_threshold = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.01, interactive=True)
+                        character_threshold = gr.Slider(label="Character threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.01, interactive=True)
+                    keep_tags = gr.Radio(label="Remove tags leaving only the following", choices=["body", "dress", "all"], value="body")
+                    generate_from_image_btn = gr.Button(value="GENERATE TAGS FROM IMAGE", size="lg", variant="primary")
                 with gr.Group():
+                    input_character = gr.Textbox(label="Character tags", placeholder="hatsune miku")
+                    input_copyright = gr.Textbox(label="Copyright tags", placeholder="vocaloid")
+                    input_general = gr.TextArea(label="General tags", lines=4, placeholder="1girl, ...", value="")
                     input_tags_to_copy = gr.Textbox(value="", visible=False)
+                    copy_input_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
+                    translate_input_prompt_button = gr.Button(value="Translate prompt to English", size="sm", variant="secondary")
+                    tag_type = gr.Radio(label="Output tag conversion", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="e621", visible=False)
+                    input_rating = gr.Radio(label="Rating", choices=list(V2_RATING_OPTIONS), value="explicit")
                     with gr.Accordion(label="Advanced options", open=False):
+                        input_aspect_ratio = gr.Radio(label="Aspect ratio", info="The aspect ratio of the image.", choices=list(V2_ASPECT_RATIO_OPTIONS), value="square")
+                        input_length = gr.Radio(label="Length", info="The total length of the tags.", choices=list(V2_LENGTH_OPTIONS), value="very_long")
+                        input_identity = gr.Radio(label="Keep identity", info="How strictly to keep the identity of the character or subject. If you specify the detail of subject in the prompt, you should choose `strict`. Otherwise, choose `none` or `lax`. `none` is very creative but sometimes ignores the input prompt.", choices=list(V2_IDENTITY_OPTIONS), value="lax")
+                        input_ban_tags = gr.Textbox(label="Ban tags", info="Tags to ban from the output.", placeholder="alternate costumen, ...", value="censored")
+                        model_name = gr.Dropdown(label="Model", choices=list(V2_ALL_MODELS.keys()), value=list(V2_ALL_MODELS.keys())[0])
+                        dummy_np = gr.Textbox(label="Negative prompt", value="", visible=False)
+                        recom_animagine = gr.Textbox(label="Animagine reccomended prompt", value="Animagine", visible=False)
+                        recom_pony = gr.Textbox(label="Pony reccomended prompt", value="Pony", visible=False)
+                generate_btn = gr.Button(value="GENERATE TAGS", size="lg", variant="primary")
                 with gr.Group():
+                    output_text = gr.TextArea(label="Output tags", interactive=False, show_copy_button=True)
+                    copy_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
+                    elapsed_time_md = gr.Markdown(label="Elapsed time", value="", visible=False)
+                with gr.Group():
+                    output_text_pony = gr.TextArea(label="Output tags (Pony e621 style)", interactive=False, show_copy_button=True)
+                    copy_btn_pony = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
                 description_ui()
         v2.input_components = [
             input_length,
             input_identity,
             input_ban_tags,
         ]
         translate_input_prompt_button.click(translate_prompt, inputs=[input_general], outputs=[input_general])
         generate_from_image_btn.click(
             predict_tags,
+            inputs=[input_image, general_threshold, character_threshold],
             outputs=[
                 input_copyright,
                 input_character,
                 input_general,
                 copy_input_btn,
             ],
+        ).then(remove_specific_prompt, inputs=[input_general, keep_tags], outputs=[input_general])
+        copy_input_btn.click(compose_prompt_to_copy, inputs=[input_character, input_copyright, input_general], outputs=[input_tags_to_copy]).then(
+            gradio_copy_text, inputs=[input_tags_to_copy], js=COPY_ACTION_JS,
         )
         generate_btn.click(
             parse_upsampling_output(v2.on_generate),
             inputs=[
                 *v2.input_components,
             ],
+            outputs=[output_text, elapsed_time_md, copy_btn, copy_btn_pony],
+        ).then(
+            convert_danbooru_to_e621_prompt, inputs=[output_text, tag_type], outputs=[output_text_pony],
+        ).then(
+            insert_recom_prompt, inputs=[output_text, dummy_np, recom_animagine], outputs=[output_text, dummy_np],
+        ).then(
+            insert_recom_prompt, inputs=[output_text_pony, dummy_np, recom_pony], outputs=[output_text_pony, dummy_np],
         )
         copy_btn.click(gradio_copy_text, inputs=[output_text], js=COPY_ACTION_JS)
+        copy_btn_pony.click(gradio_copy_text, inputs=[output_text_pony], js=COPY_ACTION_JS)
     with gr.Accordion("Examples", open=True, visible=True):
         gr.Examples(

output.py CHANGED Viewed

@@ -12,6 +12,5 @@ class UpsamplingOutput:
     aspect_ratio_tag: str
     length_tag: str
     identity_tag: str
-    tag_type: str
     elapsed_time: float = 0.0

     aspect_ratio_tag: str
     length_tag: str
     identity_tag: str
     elapsed_time: float = 0.0

tagger.py CHANGED Viewed

@@ -1,6 +1,4 @@
 from PIL import Image
-from typing import Callable
 import torch
 from transformers import (
@@ -11,15 +9,10 @@ from transformers import (
 import gradio as gr
 import spaces  # ZERO GPU
-from output import UpsamplingOutput
 MODEL_NAMES = ["p1atdev/wd-swinv2-tagger-v3-hf"]
 MODEL_NAME = MODEL_NAMES[0]
-model = AutoModelForImageClassification.from_pretrained(
-    MODEL_NAME,
-)
 model.to("cuda" if torch.cuda.is_available() else "cpu")
 processor = AutoImageProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
@@ -43,22 +36,6 @@ RATING_MAP = {
     "questionable": "nsfw",
     "explicit": "explicit, nsfw",
 }
-NORMALIZE_RATING_TAG = {
-    "sfw": "",
-    "general": "",
-    "sensitive": "sensitive",
-    "nsfw": "nsfw",
-    "questionable": "nsfw",
-    "explicit": "nsfw, explicit",
-}
-NORMALIZE_RATING_TAG_E621 = {
-    "sfw": "rating_safe",
-    "general": "rating_safe",
-    "sensitive": "sensitive",
-    "nsfw": "nsfw, rating_explicit",
-    "questionable": "rating_questionable",
-    "explicit": "rating_explicit",
-}
 DANBOORU_TO_E621_RATING_MAP = {
     "safe": "rating_safe",
     "sensitive": "rating_safe",
@@ -160,6 +137,7 @@ def convert_danbooru_to_e621_prompt(input_prompt: str = "", prompt_type: str = "
     rating_tags = sorted(set(rating_tags), key=rating_tags.index)
     rating_tags = [rating_tags[0]] if rating_tags else []
     output_prompt = ", ".join(people_tags + other_tags + rating_tags)
@@ -246,7 +224,7 @@ def get_tag_group_dict():
     return tag_group_dict
-def is_necessary(tag, keep_tags, group_dict):
     def is_dressed(tag):
         import re
         p = re.compile(r'dress|cloth|uniform|costume|vest|sweater|coat|shirt|jacket|blazer|apron|leotard|hood|sleeve|skirt|shorts|pant|loafer|ribbon|necktie|bow|collar|glove|sock|shoe|boots|wear|emblem')
@@ -257,27 +235,48 @@ def is_necessary(tag, keep_tags, group_dict):
         p = re.compile(r'background|outline|light|sky|build|day|screen|tree|city')
         return p.search(tag)
     group_list = ['people', 'age', 'pattern', 'place', 'hair', 'modifier', 'screen', 'animal', 'effect', 'situation', 'status', 'lighting', 'accesory', 'body', 'nsfw', 'camera', 'option', 'taste', 'other', 'detail', 'action', 'dress', 'character', 'face', 'costume', 'attribute', 'weather', 'temporary', 'gender', 'favorite', 'food', 'object', 'quality', 'expression', 'life', 'background']
     keep_group_dict = {
         "body": ['people', 'age', 'hair', 'body', 'character', 'face', 'gender'],
         "dress": ['people', 'age', 'hair', 'accesory', 'body', 'dress', 'character', 'face', 'costume', 'gender'],
         "all": ['people', 'age', 'pattern', 'place', 'hair', 'modifier', 'screen', 'animal', 'effect', 'situation', 'status', 'lighting', 'accesory', 'body', 'nsfw', 'camera', 'option', 'taste', 'other', 'detail', 'action', 'dress', 'character', 'face', 'costume', 'attribute', 'weather', 'temporary', 'gender', 'favorite', 'food', 'object', 'quality', 'expression', 'life', 'background']
     }
     keep_group = keep_group_dict.get(keep_tags, ['people', 'age', 'hair', 'body', 'character', 'face', 'gender'])
     explicit_group = list(set(group_list) ^ set(keep_group))
-    if group_dict.get(tag.strip().replace("_", " "), "") in explicit_group:
-        return False
-    elif keep_tags == "body" and is_dressed(tag):
-        return False
-    elif is_background(tag):
-        return False
-    else:
-        return True
-def postprocess_results(
-    results: dict[str, float], general_threshold: float, character_threshold: float
-):
     results = {
         k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True)
     }
@@ -302,16 +301,15 @@ def postprocess_results(
     return rating, character, general
-def gen_prompt(rating: list[str], character: list[str], general: list[str], keep_tags):
     people_tags: list[str] = []
     other_tags: list[str] = []
     rating_tag = RATING_MAP[rating[0]]
-    group_dict = get_tag_group_dict()
     for tag in general:
         if tag in PEOPLE_TAGS:
             people_tags.append(tag)
-        elif is_necessary(tag, keep_tags, group_dict):
             other_tags.append(tag)
     all_tags = people_tags + other_tags
@@ -320,9 +318,7 @@ def gen_prompt(rating: list[str], character: list[str], general: list[str], keep
 @spaces.GPU()
-def predict_tags(
-    image: Image.Image, general_threshold: float = 0.3, character_threshold: float = 0.8, keep_tags = "all",
-):
     inputs = processor.preprocess(image, return_tensors="pt")
     outputs = model(**inputs.to(model.device, model.dtype))
@@ -339,7 +335,7 @@ def predict_tags(
     )
     prompt = gen_prompt(
-        list(rating.keys()), list(character.keys()), list(general.keys()), keep_tags
     )
     output_series_tag = ""
@@ -349,65 +345,13 @@ def predict_tags(
     else:
         output_series_tag = ""
-    cprompt = ", ".join(character.keys())
-    cprompt = cprompt + ", " + output_series_tag if output_series_tag else cprompt
-    cprompt = cprompt + ", " + prompt if prompt else cprompt
-    return output_series_tag, ", ".join(character.keys()), prompt, cprompt, gr.update(interactive=True),
-def gen_prompt_text(output: UpsamplingOutput):
-    # separate people tags (e.g. 1girl)
-    people_tags = []
-    other_general_tags = []
-    e621_dict = get_e621_dict() if output.tag_type == "e621" else {}
-    for tag in output.general_tags.split(","):
-        tag = tag.strip()
-        if tag in PEOPLE_TAGS:
-            if output.tag_type == "e621":
-                tag = danbooru_to_e621(tag, e621_dict)
-            people_tags.append(tag)
-        else:
-            if output.tag_type == "e621":
-                tag = danbooru_to_e621(tag, e621_dict)
-            other_general_tags.append(tag)
-    return ", ".join(
-        [
-            part.strip()
-            for part in [
-                *people_tags,
-                output.character_tags,
-                output.copyright_tags,
-                *other_general_tags,
-                output.upsampled_tags,
-                NORMALIZE_RATING_TAG_E621[output.rating_tag] if output.tag_type == "e621" else NORMALIZE_RATING_TAG[output.rating_tag],
-            ]
-            if part.strip() != ""
-        ]
-    )
-def elapsed_time_format(elapsed_time: float) -> str:
-    return f"Elapsed: {elapsed_time:.2f} seconds"
-def parse_upsampling_output(
-    upsampler: Callable[..., UpsamplingOutput],
-):
-    def _parse_upsampling_output(*args) -> tuple[str, str, dict]:
-        output = upsampler(*args)
-        print(output)
-        return (
-            gen_prompt_text(output),
-            elapsed_time_format(output.elapsed_time),
-            gr.update(
-                interactive=True,
-            ),
-        )
-    return _parse_upsampling_output

 from PIL import Image
 import torch
 from transformers import (
 import gradio as gr
 import spaces  # ZERO GPU
 MODEL_NAMES = ["p1atdev/wd-swinv2-tagger-v3-hf"]
 MODEL_NAME = MODEL_NAMES[0]
+model = AutoModelForImageClassification.from_pretrained(MODEL_NAME, trust_remote_code=True)
 model.to("cuda" if torch.cuda.is_available() else "cpu")
 processor = AutoImageProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
     "questionable": "nsfw",
     "explicit": "explicit, nsfw",
 }
 DANBOORU_TO_E621_RATING_MAP = {
     "safe": "rating_safe",
     "sensitive": "rating_safe",
     rating_tags = sorted(set(rating_tags), key=rating_tags.index)
     rating_tags = [rating_tags[0]] if rating_tags else []
+    rating_tags = ["explicit, nsfw"] if rating_tags[0] == "explicit" else rating_tags
     output_prompt = ", ".join(people_tags + other_tags + rating_tags)
     return tag_group_dict
+def remove_specific_prompt(input_prompt: str = "", keep_tags: str = "all"):
     def is_dressed(tag):
         import re
         p = re.compile(r'dress|cloth|uniform|costume|vest|sweater|coat|shirt|jacket|blazer|apron|leotard|hood|sleeve|skirt|shorts|pant|loafer|ribbon|necktie|bow|collar|glove|sock|shoe|boots|wear|emblem')
         p = re.compile(r'background|outline|light|sky|build|day|screen|tree|city')
         return p.search(tag)
+    un_tags = ['solo']
     group_list = ['people', 'age', 'pattern', 'place', 'hair', 'modifier', 'screen', 'animal', 'effect', 'situation', 'status', 'lighting', 'accesory', 'body', 'nsfw', 'camera', 'option', 'taste', 'other', 'detail', 'action', 'dress', 'character', 'face', 'costume', 'attribute', 'weather', 'temporary', 'gender', 'favorite', 'food', 'object', 'quality', 'expression', 'life', 'background']
     keep_group_dict = {
         "body": ['people', 'age', 'hair', 'body', 'character', 'face', 'gender'],
         "dress": ['people', 'age', 'hair', 'accesory', 'body', 'dress', 'character', 'face', 'costume', 'gender'],
         "all": ['people', 'age', 'pattern', 'place', 'hair', 'modifier', 'screen', 'animal', 'effect', 'situation', 'status', 'lighting', 'accesory', 'body', 'nsfw', 'camera', 'option', 'taste', 'other', 'detail', 'action', 'dress', 'character', 'face', 'costume', 'attribute', 'weather', 'temporary', 'gender', 'favorite', 'food', 'object', 'quality', 'expression', 'life', 'background']
     }
+    def is_necessary(tag, keep_tags, group_dict):
+        if keep_tags == "all":
+            return True
+        elif tag in un_tags or group_dict.get(tag, "") in explicit_group:
+            return False
+        elif keep_tags == "body" and is_dressed(tag):
+            return False
+        elif is_background(tag):
+            return False
+        else:
+            return True
+    if keep_tags == "all": return input_prompt
     keep_group = keep_group_dict.get(keep_tags, ['people', 'age', 'hair', 'body', 'character', 'face', 'gender'])
     explicit_group = list(set(group_list) ^ set(keep_group))
+    tags = input_prompt.split(",") if input_prompt else []
+    people_tags: list[str] = []
+    other_tags: list[str] = []
+    group_dict = get_tag_group_dict()
+    for tag in tags:
+        tag = tag.strip().replace("_", " ")
+        if tag in PEOPLE_TAGS:
+            people_tags.append(tag)
+        elif is_necessary(tag, keep_tags, group_dict):
+            other_tags.append(tag)
+    output_prompt = ", ".join(people_tags + other_tags)
+    return output_prompt
+def postprocess_results(results: dict[str, float], general_threshold: float, character_threshold: float):
     results = {
         k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True)
     }
     return rating, character, general
+def gen_prompt(rating: list[str], character: list[str], general: list[str]):
     people_tags: list[str] = []
     other_tags: list[str] = []
     rating_tag = RATING_MAP[rating[0]]
     for tag in general:
         if tag in PEOPLE_TAGS:
             people_tags.append(tag)
+        else:
             other_tags.append(tag)
     all_tags = people_tags + other_tags
 @spaces.GPU()
+def predict_tags(image: Image.Image, general_threshold: float = 0.3, character_threshold: float = 0.8):
     inputs = processor.preprocess(image, return_tensors="pt")
     outputs = model(**inputs.to(model.device, model.dtype))
     )
     prompt = gen_prompt(
+        list(rating.keys()), list(character.keys()), list(general.keys())
     )
     output_series_tag = ""
     else:
         output_series_tag = ""
+    return output_series_tag, ", ".join(character.keys()), prompt, gr.update(interactive=True),
+def compose_prompt_to_copy(character: str, series: str, general: str):
+    characters = character.split(",") if character else []
+    serieses = series.split(",") if series else []
+    generals = general.split(",") if general else []
+    tags = characters + serieses + generals
+    cprompt = ",".join(tags) if tags else ""
+    return cprompt

utils.py CHANGED Viewed

@@ -1,38 +1,15 @@
 import gradio as gr
 from dartrs.v2 import AspectRatioTag, LengthTag, RatingTag, IdentityTag
-# from https://huggingface.co/spaces/cagliostrolab/animagine-xl-3.1/blob/main/config.py
-QUALITY_TAGS = {
-    "default": "(masterpiece), best quality, very aesthetic, perfect face",
-}
-NEGATIVE_PROMPT = {
-    "default": "nsfw, (low quality, worst quality:1.2), very displeasing, 3d, watermark, signature, ugly, poorly drawn",
-}
-IMAGE_SIZE_OPTIONS = {
-    "1536x640": "<|aspect_ratio:ultra_wide|>",
-    "1344x768": "<|aspect_ratio:wide|>",
-    "1024x1024": "<|aspect_ratio:square|>",
-    "768x1344": "<|aspect_ratio:tall|>",
-    "640x1536": "<|aspect_ratio:ultra_tall|>",
-}
-IMAGE_SIZES = {
-    "1536x640": (1536, 640),
-    "1344x768": (1344, 768),
-    "1024x1024": (1024, 1024),
-    "768x1344": (768, 1344),
-    "640x1536": (640, 1536),
-}
-ASPECT_RATIO_OPTIONS: list[AspectRatioTag] = [
     "ultra_wide",
     "wide",
     "square",
     "tall",
     "ultra_tall",
 ]
-RATING_OPTIONS: list[RatingTag] = [
     "sfw",
     "general",
     "sensitive",
@@ -40,28 +17,20 @@ RATING_OPTIONS: list[RatingTag] = [
     "questionable",
     "explicit",
 ]
-LENGTH_OPTIONS: list[LengthTag] = [
     "very_short",
     "short",
     "medium",
     "long",
     "very_long",
 ]
-IDENTITY_OPTIONS: list[IdentityTag] = [
     "none",
     "lax",
     "strict",
 ]
-PEOPLE_TAGS = [
-    *[f"1{x}" for x in ["girl", "boy", "other"]],
-    *[f"{i}girls" for i in range(2, 6)],
-    *[f"6+{x}s" for x in ["girl", "boy", "other"]],
-    "no humans",
-]
 # ref: https://qiita.com/tregu148/items/fccccbbc47d966dd2fc2
 def gradio_copy_text(_text: None):
     gr.Info("Copied!")

 import gradio as gr
 from dartrs.v2 import AspectRatioTag, LengthTag, RatingTag, IdentityTag
+V2_ASPECT_RATIO_OPTIONS: list[AspectRatioTag] = [
     "ultra_wide",
     "wide",
     "square",
     "tall",
     "ultra_tall",
 ]
+V2_RATING_OPTIONS: list[RatingTag] = [
     "sfw",
     "general",
     "sensitive",
     "questionable",
     "explicit",
 ]
+V2_LENGTH_OPTIONS: list[LengthTag] = [
     "very_short",
     "short",
     "medium",
     "long",
     "very_long",
 ]
+V2_IDENTITY_OPTIONS: list[IdentityTag] = [
     "none",
     "lax",
     "strict",
 ]
 # ref: https://qiita.com/tregu148/items/fccccbbc47d966dd2fc2
 def gradio_copy_text(_text: None):
     gr.Info("Copied!")

v2.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import time
 import os
 import torch
 from dartrs.v2 import (
     V2Model,
@@ -33,7 +34,7 @@ from output import UpsamplingOutput
 HF_TOKEN = os.getenv("HF_TOKEN", None)
-ALL_MODELS = {
     "dart-v2-moe-sft": {
         "repo": "p1atdev/dart-v2-moe-sft",
         "type": "sft",
@@ -85,6 +86,67 @@ def generate_tags(
     return output
 class V2UI:
     model_name: str | None = None
     model: V2Model
@@ -104,11 +166,10 @@ class V2UI:
         length_tag: LengthTag,
         identity_tag: IdentityTag,
         ban_tags: str,
-        tag_type: str,
         *args,
     ) -> UpsamplingOutput:
         if self.model_name is None or self.model_name != model_name:
-            models = prepare_models(ALL_MODELS[model_name])
             self.model = models["model"]
             self.tokenizer = models["tokenizer"]
             self.model_name = model_name
@@ -149,5 +210,5 @@ class V2UI:
             length_tag=length_tag,
             identity_tag=identity_tag,
             elapsed_time=elapsed_time,
-            tag_type=tag_type,
         )

 import time
 import os
 import torch
+from typing import Callable
 from dartrs.v2 import (
     V2Model,
 HF_TOKEN = os.getenv("HF_TOKEN", None)
+V2_ALL_MODELS = {
     "dart-v2-moe-sft": {
         "repo": "p1atdev/dart-v2-moe-sft",
         "type": "sft",
     return output
+def _people_tag(noun: str, minimum: int = 1, maximum: int = 5):
+    return (
+        [f"1{noun}"]
+        + [f"{num}{noun}s" for num in range(minimum + 1, maximum + 1)]
+        + [f"{maximum+1}+{noun}s"]
+    )
+PEOPLE_TAGS = (
+    _people_tag("girl") + _people_tag("boy") + _people_tag("other") + ["no humans"]
+)
+def gen_prompt_text(output: UpsamplingOutput):
+    # separate people tags (e.g. 1girl)
+    people_tags = []
+    other_general_tags = []
+    for tag in output.general_tags.split(","):
+        tag = tag.strip()
+        if tag in PEOPLE_TAGS:
+            people_tags.append(tag)
+        else:
+            other_general_tags.append(tag)
+    return ", ".join(
+        [
+            part.strip()
+            for part in [
+                *people_tags,
+                output.character_tags,
+                output.copyright_tags,
+                *other_general_tags,
+                output.upsampled_tags,
+                output.rating_tag,
+            ]
+            if part.strip() != ""
+        ]
+    )
+def elapsed_time_format(elapsed_time: float) -> str:
+    return f"Elapsed: {elapsed_time:.2f} seconds"
+def parse_upsampling_output(
+    upsampler: Callable[..., UpsamplingOutput],
+):
+    def _parse_upsampling_output(*args) -> tuple[str, str, dict]:
+        output = upsampler(*args)
+        return (
+            gen_prompt_text(output),
+            elapsed_time_format(output.elapsed_time),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+        )
+    return _parse_upsampling_output
 class V2UI:
     model_name: str | None = None
     model: V2Model
         length_tag: LengthTag,
         identity_tag: IdentityTag,
         ban_tags: str,
         *args,
     ) -> UpsamplingOutput:
         if self.model_name is None or self.model_name != model_name:
+            models = prepare_models(V2_ALL_MODELS[model_name])
             self.model = models["model"]
             self.tokenizer = models["tokenizer"]
             self.model_name = model_name
             length_tag=length_tag,
             identity_tag=identity_tag,
             elapsed_time=elapsed_time,
         )