Spaces:

John6666
/

Prompt-Enhancer

Paused

App Files Files Community

John6666 commited on Aug 4, 2024

Commit

862d3ae

verified ·

1 Parent(s): d9301f2

Upload 12 files

Browse files

Files changed (8) hide show

README.md +1 -1
app.py +39 -54
fl2basepromptgen.py +6 -4
fl2sd3longcap.py +4 -2
promptenhancer.py +6 -3
requirements.txt +1 -1
tagger.py +62 -19
utils.py +5 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🏃📦
 colorFrom: blue
 colorTo: yellow
 sdk: gradio
-sdk_version: 4.37.2
 app_file: app.py
 pinned: false
 license: apache-2.0

 colorFrom: blue
 colorTo: yellow
 sdk: gradio
+sdk_version: 4.39.0
 app_file: app.py
 pinned: false
 license: apache-2.0

app.py CHANGED Viewed

@@ -1,17 +1,9 @@
-from PIL import Image
 import gradio as gr
-from v2 import (
-    V2_ALL_MODELS,
-)
 from utils import (
     gradio_copy_text,
     COPY_ACTION_JS,
-    V2_ASPECT_RATIO_OPTIONS,
-    V2_RATING_OPTIONS,
-    V2_LENGTH_OPTIONS,
-    V2_IDENTITY_OPTIONS
 )
 from tagger import (
     predict_tags_wd,
@@ -20,21 +12,22 @@ from tagger import (
     insert_recom_prompt,
     compose_prompt_to_copy,
     translate_prompt,
 )
-from fl2sd3longcap import (
-    predict_tags_fl2_sd3,
-)
-from fl2basepromptgen import (
-    predict_tags_fl2_base_prompt_gen,
-)
 from promptenhancer import prompt_enhancer
 def description_ui():
     gr.Markdown(
         """
 ## Prompt Enhancer with WD Tagger & SD3 Long Captioner
 (Image =>) Prompt => Upsampled longer prompt
 - It's a mod. Original Spaces: p1atdev's [WD Tagger with 🤗 transformers](https://huggingface.co/spaces/p1atdev/wd-tagger-transformers),\
  gokaygokay's [Prompt-Enhancer](https://huggingface.co/spaces/gokaygokay/Prompt-Enhancer) /\
  [Florence-2-SD3-Captioner](https://huggingface.co/spaces/gokaygokay/Florence-2-SD3-Captioner).
@@ -46,62 +39,54 @@ def description_ui():
 """
     )
 def main():
     with gr.Blocks() as ui:
         description_ui()
-        with gr.Row():
-            with gr.Column(scale=2):
-                with gr.Group():
-                    input_image = gr.Image(label="Input image", type="pil", sources=["upload", "clipboard"], height=256)
-                    with gr.Accordion(label="Advanced options", open=False):
-                        general_threshold = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.01, interactive=True)
-                        character_threshold = gr.Slider(label="Character threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.01, interactive=True)
-                        input_tag_type = gr.Radio(label="Convert tags to", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="danbooru")
-                        recom_prompt = gr.Radio(label="Insert reccomended prompt", choices=["None", "Animagine", "Pony"], value="None", interactive=True)
-                        keep_tags = gr.Radio(label="Remove tags leaving only the following", choices=["body", "dress", "all"], value="all")
-                    image_algorithms = gr.CheckboxGroup(["Use WD Tagger", "Use Florence-2-SD3-Long-Captioner", "Use Florence-2-base-PromptGen"], label="Algorithms", value=["Use WD Tagger", "Use Florence-2-SD3-Long-Captioner"])
-                    generate_from_image_btn = gr.Button(value="GENERATE TAGS FROM IMAGE", size="lg", variant="primary")
-                with gr.Group():
                     input_character = gr.Textbox(label="Character tags", placeholder="hatsune miku")
                     input_copyright = gr.Textbox(label="Copyright tags", placeholder="vocaloid")
-                    input_general = gr.TextArea(label="General tags", lines=4, placeholder="1girl, ...", value="")
-                    input_tags_to_copy = gr.Textbox(value="", visible=False)
-                    copy_input_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
-                    translate_input_prompt_button = gr.Button(value="Translate prompt to English", size="sm", variant="secondary")
-                    prompt_enhancer_model = gr.Radio(["Medium", "Long"], label="Model Choice", value="Long", info="Enhance your prompts with Medium or Long answers")
-                    with gr.Accordion(label="Advanced options", open=False, visible=False):
-                        tag_type = gr.Radio(label="Output tag conversion", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="e621", visible=False)
-                        input_rating = gr.Radio(label="Rating", choices=list(V2_RATING_OPTIONS), value="explicit", visible=False)
-                        input_aspect_ratio = gr.Radio(label="Aspect ratio", info="The aspect ratio of the image.", choices=list(V2_ASPECT_RATIO_OPTIONS), value="square", visible=False)
-                        input_length = gr.Radio(label="Length", info="The total length of the tags.", choices=list(V2_LENGTH_OPTIONS), value="very_long", visible=False)
-                        input_identity = gr.Radio(label="Keep identity", info="How strictly to keep the identity of the character or subject. If you specify the detail of subject in the prompt, you should choose `strict`. Otherwise, choose `none` or `lax`. `none` is very creative but sometimes ignores the input prompt.", choices=list(V2_IDENTITY_OPTIONS), value="lax", visible=False)
-                        input_ban_tags = gr.Textbox(label="Ban tags", info="Tags to ban from the output.", placeholder="alternate costumen, ...", value="censored", visible=False)
-                        model_name = gr.Dropdown(label="Model", choices=list(V2_ALL_MODELS.keys()), value=list(V2_ALL_MODELS.keys())[0], visible=False)
-                        dummy_np = gr.Textbox(label="Negative prompt", value="", visible=False)
-                        recom_animagine = gr.Textbox(label="Animagine reccomended prompt", value="Animagine", visible=False)
-                        recom_pony = gr.Textbox(label="Pony reccomended prompt", value="Pony", visible=False)
                 generate_btn = gr.Button(value="GENERATE TAGS", size="lg", variant="primary")
                 with gr.Group():
                     output_text = gr.TextArea(label="Output tags", interactive=False, show_copy_button=True)
                     copy_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
-                    elapsed_time_md = gr.Markdown(label="Elapsed time", value="", visible=False)
                 with gr.Group():
                     output_text_pony = gr.TextArea(label="Output tags (Pony e621 style)", interactive=False, show_copy_button=True)
                     copy_btn_pony = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
         translate_input_prompt_button.click(translate_prompt, [input_general], [input_general], queue=False)
         translate_input_prompt_button.click(translate_prompt, [input_character], [input_character], queue=False)
         translate_input_prompt_button.click(translate_prompt, [input_copyright], [input_copyright], queue=False)
         generate_from_image_btn.click(
             predict_tags_wd,
             [input_image, input_general, image_algorithms, general_threshold, character_threshold],
             [input_copyright, input_character, input_general, copy_input_btn],

 import gradio as gr
+import spaces
 from utils import (
     gradio_copy_text,
     COPY_ACTION_JS,
 )
 from tagger import (
     predict_tags_wd,
     insert_recom_prompt,
     compose_prompt_to_copy,
     translate_prompt,
+    select_random_character,
 )
+from fl2sd3longcap import predict_tags_fl2_sd3
+from fl2basepromptgen import predict_tags_fl2_base_prompt_gen
 from promptenhancer import prompt_enhancer
 def description_ui():
     gr.Markdown(
         """
 ## Prompt Enhancer with WD Tagger & SD3 Long Captioner
 (Image =>) Prompt => Upsampled longer prompt
+"""
+    )
+def description_ui2():
+    gr.Markdown(
+        """
 - It's a mod. Original Spaces: p1atdev's [WD Tagger with 🤗 transformers](https://huggingface.co/spaces/p1atdev/wd-tagger-transformers),\
  gokaygokay's [Prompt-Enhancer](https://huggingface.co/spaces/gokaygokay/Prompt-Enhancer) /\
  [Florence-2-SD3-Captioner](https://huggingface.co/spaces/gokaygokay/Florence-2-SD3-Captioner).
 """
     )
 def main():
     with gr.Blocks() as ui:
         description_ui()
+        with gr.Column():
+            with gr.Group():
+                input_image = gr.Image(label="Input image", type="pil", sources=["upload", "clipboard"], height=256)
+                with gr.Accordion(label="Advanced options", open=False):
+                    general_threshold = gr.Slider(label="Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.01, interactive=True)
+                    character_threshold = gr.Slider(label="Character threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.01, interactive=True)
+                    input_tag_type = gr.Radio(label="Convert tags to", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="danbooru")
+                    recom_prompt = gr.Radio(label="Insert reccomended prompt", choices=["None", "Animagine", "Pony"], value="None", interactive=True)
+                    keep_tags = gr.Radio(label="Remove tags leaving only the following", choices=["body", "dress", "all"], value="all")
+                image_algorithms = gr.CheckboxGroup(["Use WD Tagger", "Use Florence-2-SD3-Long-Captioner", "Use Florence-2-base-PromptGen"], label="Algorithms", value=["Use WD Tagger", "Use Florence-2-SD3-Long-Captioner"])
+                generate_from_image_btn = gr.Button(value="GENERATE TAGS FROM IMAGE", size="lg", variant="primary")
+            with gr.Group():
+                with gr.Row():
                     input_character = gr.Textbox(label="Character tags", placeholder="hatsune miku")
                     input_copyright = gr.Textbox(label="Copyright tags", placeholder="vocaloid")
+                    random_character = gr.Button(value="Random character 🎲", size="sm")
+                input_general = gr.TextArea(label="General tags", lines=4, placeholder="1girl, ...", value="")
+                input_tags_to_copy = gr.Textbox(value="", visible=False)
+                copy_input_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
+                translate_input_prompt_button = gr.Button(value="Translate prompt to English", size="sm", variant="secondary")
+                prompt_enhancer_model = gr.Radio(["Medium", "Long"], label="Model Choice", value="Long", info="Enhance your prompts with Medium or Long answers")
+                with gr.Accordion(label="Advanced options", open=False, visible=False):
+                    tag_type = gr.Radio(label="Output tag conversion", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="e621", visible=False)
+                    dummy_np = gr.Textbox(label="Negative prompt", value="", visible=False)
+                    recom_animagine = gr.Textbox(label="Animagine reccomended prompt", value="Animagine", visible=False)
+                    recom_pony = gr.Textbox(label="Pony reccomended prompt", value="Pony", visible=False)
                 generate_btn = gr.Button(value="GENERATE TAGS", size="lg", variant="primary")
+            with gr.Row():
                 with gr.Group():
                     output_text = gr.TextArea(label="Output tags", interactive=False, show_copy_button=True)
                     copy_btn = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
                 with gr.Group():
                     output_text_pony = gr.TextArea(label="Output tags (Pony e621 style)", interactive=False, show_copy_button=True)
                     copy_btn_pony = gr.Button(value="Copy to clipboard", size="sm", interactive=False)
+            description_ui2()
+        random_character.click(select_random_character, [input_copyright, input_character], [input_copyright, input_character], queue=False)
         translate_input_prompt_button.click(translate_prompt, [input_general], [input_general], queue=False)
         translate_input_prompt_button.click(translate_prompt, [input_character], [input_character], queue=False)
         translate_input_prompt_button.click(translate_prompt, [input_copyright], [input_copyright], queue=False)
         generate_from_image_btn.click(
+            lambda: ("", "", ""), None, [input_copyright, input_character, input_general], queue=False,
+        ).success(
             predict_tags_wd,
             [input_image, input_general, image_algorithms, general_threshold, character_threshold],
             [input_copyright, input_character, input_general, copy_input_btn],

fl2basepromptgen.py CHANGED Viewed

@@ -1,11 +1,13 @@
 from transformers import AutoProcessor, AutoModelForCausalLM
 import spaces
 from PIL import Image
-#import subprocess
-#subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-fl_model = AutoModelForCausalLM.from_pretrained('MiaoshouAI/Florence-2-base-PromptGen', trust_remote_code=True).eval()
 fl_processor = AutoProcessor.from_pretrained('MiaoshouAI/Florence-2-base-PromptGen', trust_remote_code=True)
@@ -18,7 +20,7 @@ def fl_run(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
-    inputs = fl_processor(text=prompt, images=image, return_tensors="pt")
     generated_ids = fl_model.generate(
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],

 from transformers import AutoProcessor, AutoModelForCausalLM
 import spaces
 from PIL import Image
+import torch
+import subprocess
+subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+fl_model = AutoModelForCausalLM.from_pretrained('MiaoshouAI/Florence-2-base-PromptGen', trust_remote_code=True).to(device).eval()
 fl_processor = AutoProcessor.from_pretrained('MiaoshouAI/Florence-2-base-PromptGen', trust_remote_code=True)
     if image.mode != "RGB":
         image = image.convert("RGB")
+    inputs = fl_processor(text=prompt, images=image, return_tensors="pt").to(device)
     generated_ids = fl_model.generate(
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],

fl2sd3longcap.py CHANGED Viewed

@@ -2,11 +2,13 @@ from transformers import AutoProcessor, AutoModelForCausalLM
 import spaces
 import re
 from PIL import Image
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
-fl_model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).eval()
 fl_processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
@@ -48,7 +50,7 @@ def fl_run_example(image):
     if image.mode != "RGB":
         image = image.convert("RGB")
-    inputs = fl_processor(text=prompt, images=image, return_tensors="pt")
     generated_ids = fl_model.generate(
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],

 import spaces
 import re
 from PIL import Image
+import torch
 import subprocess
 subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
+device = "cuda" if torch.cuda.is_available() else "cpu"
+fl_model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).to(device).eval()
 fl_processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
     if image.mode != "RGB":
         image = image.convert("RGB")
+    inputs = fl_processor(text=prompt, images=image, return_tensors="pt").to(device)
     generated_ids = fl_model.generate(
         input_ids=inputs["input_ids"],
         pixel_values=inputs["pixel_values"],

promptenhancer.py CHANGED Viewed

@@ -2,10 +2,13 @@ import spaces
 import gradio as gr
 from transformers import pipeline
 import re
 def load_models():
-    enhancer_medium = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance", device=0)
-    enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance-Long", device=0)
     return enhancer_medium, enhancer_long
 enhancer_medium, enhancer_long = load_models()
@@ -39,4 +42,4 @@ def prompt_enhancer(character: str, series: str, general: str, model_choice: str
     output = enhance_prompt(cprompt, model_choice)
     prompt = cprompt + ", " + output
-    return prompt, gr.update(interactive=True), gr.update(interactive=True),

 import gradio as gr
 from transformers import pipeline
 import re
+import torch
+device = "cuda" if torch.cuda.is_available() else "cpu"
 def load_models():
+    enhancer_medium = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance", device=device)
+    enhancer_long = pipeline("summarization", model="gokaygokay/Lamini-Prompt-Enchance-Long", device=device)
     return enhancer_medium, enhancer_long
 enhancer_medium, enhancer_long = load_models()
     output = enhance_prompt(cprompt, model_choice)
     prompt = cprompt + ", " + output
+    return prompt, gr.update(interactive=True), gr.update(interactive=True)

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-torch
 torchvision
 accelerate
 transformers

+torch==2.2.0
 torchvision
 accelerate
 transformers

tagger.py CHANGED Viewed

@@ -1,12 +1,13 @@
 from PIL import Image
 import torch
 import gradio as gr
-import spaces  # ZERO GPU
 from transformers import (
     AutoImageProcessor,
     AutoModelForImageClassification,
 )
 WD_MODEL_NAMES = ["p1atdev/wd-swinv2-tagger-v3-hf"]
 WD_MODEL_NAME = WD_MODEL_NAMES[0]
@@ -30,12 +31,15 @@ PEOPLE_TAGS = (
 RATING_MAP = {
     "general": "safe",
     "sensitive": "sensitive",
     "questionable": "nsfw",
     "explicit": "explicit, nsfw",
 }
 DANBOORU_TO_E621_RATING_MAP = {
     "safe": "rating_safe",
     "sensitive": "rating_safe",
     "nsfw": "rating_explicit",
@@ -49,6 +53,34 @@ DANBOORU_TO_E621_RATING_MAP = {
 }
 def to_list(s):
     return [x.strip() for x in s.split(",") if not s == ""]
@@ -62,9 +94,16 @@ def list_uniq(l):
 def load_dict_from_csv(filename):
-    with open(filename, 'r', encoding="utf-8") as f:
-        lines = f.readlines()
     dict = {}
     for line in lines:
         parts = line.strip().split(',')
         dict[parts[0]] = parts[1]
@@ -94,7 +133,8 @@ def character_list_to_series_list(character_list):
 def select_random_character(series: str, character: str):
-    from random import randrange
     character_list = list(anime_series_dict.keys())
     character = character_list[randrange(len(character_list) - 1)]
     series = anime_series_dict.get(character.split(",")[0].strip(), "")
@@ -104,7 +144,7 @@ def select_random_character(series: str, character: str):
 def danbooru_to_e621(dtag, e621_dict):
     def d_to_e(match, e621_dict):
         dtag = match.group(0)
-        etag = e621_dict.get(dtag.strip().replace("_", " "), "")
         if etag:
             return etag
         else:
@@ -112,7 +152,6 @@ def danbooru_to_e621(dtag, e621_dict):
     import re
     tag = re.sub(r'[\w ]+', lambda wrapper: d_to_e(wrapper, e621_dict), dtag, 2)
     return tag
@@ -128,7 +167,7 @@ def convert_danbooru_to_e621_prompt(input_prompt: str = "", prompt_type: str = "
     e621_dict = danbooru_to_e621_dict
     for tag in tags:
-        tag = tag.strip().replace("_", " ")
         tag = danbooru_to_e621(tag, e621_dict)
         if tag in PEOPLE_TAGS:
             people_tags.append(tag)
@@ -156,6 +195,7 @@ def translate_prompt(prompt: str = ""):
             translated_prompt = translator.translate(prompt, src='auto', dest='en').text
             return translated_prompt
         except Exception as e:
             return prompt
     def is_japanese(s):
@@ -188,6 +228,7 @@ def translate_prompt_to_ja(prompt: str = ""):
             translated_prompt = translator.translate(prompt, src='en', dest='ja').text
             return translated_prompt
         except Exception as e:
             return prompt
     def is_japanese(s):
@@ -213,7 +254,7 @@ def translate_prompt_to_ja(prompt: str = ""):
 def tags_to_ja(itag, dict):
     def t_to_j(match, dict):
         tag = match.group(0)
-        ja = dict.get(tag.strip().replace("_", " "), "")
         if ja:
             return ja
         else:
@@ -232,7 +273,7 @@ def convert_tags_to_ja(input_prompt: str = ""):
     tags_to_ja_dict = load_dict_from_csv('all_tags_ja_ext.csv')
     dict = tags_to_ja_dict
     for tag in tags:
-        tag = tag.strip().replace("_", " ")
         tag = tags_to_ja(tag, dict)
         out_tags.append(tag)
@@ -242,13 +283,13 @@ def convert_tags_to_ja(input_prompt: str = ""):
 enable_auto_recom_prompt = True
-animagine_ps = to_list("anime artwork, anime style, studio anime, highly detailed, masterpiece, best quality, very aesthetic, absurdres")
 animagine_nps = to_list("lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract]")
-pony_ps = to_list("source_anime, score_9, score_8_up, score_7_up, masterpiece, best quality, very aesthetic, absurdres")
-pony_nps = to_list("source_pony, source_furry, source_cartoon, score_6, score_5, score_4, busty, ugly face, mutated hands, low res, blurry face, black and white, the simpsons, overwatch, apex legends")
 other_ps = to_list("anime artwork, anime style, studio anime, highly detailed, cinematic photo, 35mm photograph, film, bokeh, professional, 4k, highly detailed")
 other_nps = to_list("photo, deformed, black and white, realism, disfigured, low contrast, drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly")
-default_ps = to_list("score_9, score_8_up, score_7_up, highly detailed, masterpiece, best quality, very aesthetic, absurdres")
 default_nps = to_list("score_6, score_5, score_4, lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract]")
 def insert_recom_prompt(prompt: str = "", neg_prompt: str = "", type: str = "None"):
     global enable_auto_recom_prompt
@@ -281,6 +322,7 @@ def insert_recom_prompt(prompt: str = "", neg_prompt: str = "", type: str = "Non
 def load_model_prompt_dict():
     import json
     dict = {}
     try:
         with open('model_dict.json', encoding='utf-8') as f:
             dict = json.load(f)
@@ -359,7 +401,7 @@ def remove_specific_prompt(input_prompt: str = "", keep_tags: str = "all"):
     group_dict = tag_group_dict
     for tag in tags:
-        tag = tag.strip().replace("_", " ")
         if tag in PEOPLE_TAGS:
             people_tags.append(tag)
         elif is_necessary(tag, keep_tags, group_dict):
@@ -387,7 +429,7 @@ def sort_taglist(tags: list[str]):
     rating_set = set(DANBOORU_TO_E621_RATING_MAP.keys()) | set(DANBOORU_TO_E621_RATING_MAP.values())
     for tag in tags:
-        tag = tag.strip().replace("_", " ")
         if tag in PEOPLE_TAGS:
             people_tags.append(tag)
         elif tag in rating_set:
@@ -488,12 +530,13 @@ def predict_tags(image: Image.Image, general_threshold: float = 0.3, character_t
         output_series_tag = output_series_list[0]
     else:
         output_series_tag = ""
-    return output_series_tag, ", ".join(character.keys()), prompt, gr.update(interactive=True),
-def predict_tags_wd(image: Image.Image, input_tags: str, algo: list[str], general_threshold: float = 0.3, character_threshold: float = 0.8):
     if not "Use WD Tagger" in algo and len(algo) != 0:
-        return "", "", input_tags, gr.update(interactive=True),
     return predict_tags(image, general_threshold, character_threshold)

 from PIL import Image
 import torch
 import gradio as gr
+import spaces
 from transformers import (
     AutoImageProcessor,
     AutoModelForImageClassification,
 )
+from pathlib import Path
 WD_MODEL_NAMES = ["p1atdev/wd-swinv2-tagger-v3-hf"]
 WD_MODEL_NAME = WD_MODEL_NAMES[0]
 RATING_MAP = {
+    "sfw": "safe",
     "general": "safe",
     "sensitive": "sensitive",
     "questionable": "nsfw",
     "explicit": "explicit, nsfw",
 }
 DANBOORU_TO_E621_RATING_MAP = {
+    "sfw": "rating_safe",
+    "general": "rating_safe",
     "safe": "rating_safe",
     "sensitive": "rating_safe",
     "nsfw": "rating_explicit",
 }
+# https://github.com/toriato/stable-diffusion-webui-wd14-tagger/blob/a9eacb1eff904552d3012babfa28b57e1d3e295c/tagger/ui.py#L368
+kaomojis = [
+    "0_0",
+    "(o)_(o)",
+    "+_+",
+    "+_-",
+    "._.",
+    "<o>_<o>",
+    "<|>_<|>",
+    "=_=",
+    ">_<",
+    "3_3",
+    "6_9",
+    ">_o",
+    "@_@",
+    "^_^",
+    "o_o",
+    "u_u",
+    "x_x",
+    "|_|",
+    "||_||",
+]
+def replace_underline(x: str):
+    return x.strip().replace("_", " ") if x not in kaomojis else x.strip()
 def to_list(s):
     return [x.strip() for x in s.split(",") if not s == ""]
 def load_dict_from_csv(filename):
     dict = {}
+    if not Path(filename).exists():
+        if Path('./tagger/', filename).exists(): filename = str(Path('./tagger/', filename))
+        else: return dict
+    try:
+        with open(filename, 'r', encoding="utf-8") as f:
+            lines = f.readlines()
+    except Exception:
+        print(f"Failed to open dictionary file: {filename}")
+        return dict
     for line in lines:
         parts = line.strip().split(',')
         dict[parts[0]] = parts[1]
 def select_random_character(series: str, character: str):
+    from random import seed, randrange
+    seed()
     character_list = list(anime_series_dict.keys())
     character = character_list[randrange(len(character_list) - 1)]
     series = anime_series_dict.get(character.split(",")[0].strip(), "")
 def danbooru_to_e621(dtag, e621_dict):
     def d_to_e(match, e621_dict):
         dtag = match.group(0)
+        etag = e621_dict.get(replace_underline(dtag), "")
         if etag:
             return etag
         else:
     import re
     tag = re.sub(r'[\w ]+', lambda wrapper: d_to_e(wrapper, e621_dict), dtag, 2)
     return tag
     e621_dict = danbooru_to_e621_dict
     for tag in tags:
+        tag = replace_underline(tag)
         tag = danbooru_to_e621(tag, e621_dict)
         if tag in PEOPLE_TAGS:
             people_tags.append(tag)
             translated_prompt = translator.translate(prompt, src='auto', dest='en').text
             return translated_prompt
         except Exception as e:
+            print(e)
             return prompt
     def is_japanese(s):
             translated_prompt = translator.translate(prompt, src='en', dest='ja').text
             return translated_prompt
         except Exception as e:
+            print(e)
             return prompt
     def is_japanese(s):
 def tags_to_ja(itag, dict):
     def t_to_j(match, dict):
         tag = match.group(0)
+        ja = dict.get(replace_underline(tag), "")
         if ja:
             return ja
         else:
     tags_to_ja_dict = load_dict_from_csv('all_tags_ja_ext.csv')
     dict = tags_to_ja_dict
     for tag in tags:
+        tag = replace_underline(tag)
         tag = tags_to_ja(tag, dict)
         out_tags.append(tag)
 enable_auto_recom_prompt = True
+animagine_ps = to_list("masterpiece, best quality, very aesthetic, absurdres")
 animagine_nps = to_list("lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract]")
+pony_ps = to_list("score_9, score_8_up, score_7_up, masterpiece, best quality, very aesthetic, absurdres")
+pony_nps = to_list("source_pony, score_6, score_5, score_4, busty, ugly face, mutated hands, low res, blurry face, black and white, the simpsons, overwatch, apex legends")
 other_ps = to_list("anime artwork, anime style, studio anime, highly detailed, cinematic photo, 35mm photograph, film, bokeh, professional, 4k, highly detailed")
 other_nps = to_list("photo, deformed, black and white, realism, disfigured, low contrast, drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly")
+default_ps = to_list("highly detailed, masterpiece, best quality, very aesthetic, absurdres")
 default_nps = to_list("score_6, score_5, score_4, lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract]")
 def insert_recom_prompt(prompt: str = "", neg_prompt: str = "", type: str = "None"):
     global enable_auto_recom_prompt
 def load_model_prompt_dict():
     import json
     dict = {}
+    path = 'model_dict.json' if Path('model_dict.json').exists() else './tagger/model_dict.json'
     try:
         with open('model_dict.json', encoding='utf-8') as f:
             dict = json.load(f)
     group_dict = tag_group_dict
     for tag in tags:
+        tag = replace_underline(tag)
         if tag in PEOPLE_TAGS:
             people_tags.append(tag)
         elif is_necessary(tag, keep_tags, group_dict):
     rating_set = set(DANBOORU_TO_E621_RATING_MAP.keys()) | set(DANBOORU_TO_E621_RATING_MAP.values())
     for tag in tags:
+        tag = replace_underline(tag)
         if tag in PEOPLE_TAGS:
             people_tags.append(tag)
         elif tag in rating_set:
         output_series_tag = output_series_list[0]
     else:
         output_series_tag = ""
+    return output_series_tag, ", ".join(character.keys()), prompt, gr.update(interactive=True)
+def predict_tags_wd(image: Image.Image, input_tags: str, algo: list[str], general_threshold: float = 0.3,
+                     character_threshold: float = 0.8, input_series: str = "", input_character: str = ""):
     if not "Use WD Tagger" in algo and len(algo) != 0:
+        return input_series, input_character, input_tags, gr.update(interactive=True)
     return predict_tags(image, general_threshold, character_threshold)

utils.py CHANGED Viewed

@@ -43,3 +43,8 @@ COPY_ACTION_JS = """\
     navigator.clipboard.writeText(inputs);
   }
 }"""

     navigator.clipboard.writeText(inputs);
   }
 }"""
+def gradio_copy_prompt(prompt: str):
+    gr.Info("Copied!")
+    return prompt