Spaces:

hahahafofo
/

image2text_prompt_generator

Runtime error

App Files Files Community

hahafofo commited on Apr 14, 2023

Commit

39fbaa4

•

1 Parent(s): 48f4d16

fix

Browse files

Files changed (7) hide show

utils/__init__.py +0 -0
utils/dbimutils.py +54 -0
utils/exif.py +54 -0
utils/html.py +8 -0
utils/image2text.py +195 -0
utils/singleton.py +37 -0
utils/translate.py +59 -0

utils/__init__.py ADDED Viewed

File without changes

utils/dbimutils.py ADDED Viewed

	@@ -0,0 +1,54 @@

+# DanBooru IMage Utility functions
+import cv2
+import numpy as np
+from PIL import Image
+def smart_imread(img, flag=cv2.IMREAD_UNCHANGED):
+    if img.endswith(".gif"):
+        img = Image.open(img)
+        img = img.convert("RGB")
+        img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+    else:
+        img = cv2.imread(img, flag)
+    return img
+def smart_24bit(img):
+    if img.dtype is np.dtype(np.uint16):
+        img = (img / 257).astype(np.uint8)
+    if len(img.shape) == 2:
+        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+    elif img.shape[2] == 4:
+        trans_mask = img[:, :, 3] == 0
+        img[trans_mask] = [255, 255, 255, 255]
+        img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
+    return img
+def make_square(img, target_size):
+    old_size = img.shape[:2]
+    desired_size = max(old_size)
+    desired_size = max(desired_size, target_size)
+    delta_w = desired_size - old_size[1]
+    delta_h = desired_size - old_size[0]
+    top, bottom = delta_h // 2, delta_h - (delta_h // 2)
+    left, right = delta_w // 2, delta_w - (delta_w // 2)
+    color = [255, 255, 255]
+    new_im = cv2.copyMakeBorder(
+        img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
+    )
+    return new_im
+def smart_resize(img, size):
+    # Assumes the image has already gone through make_square
+    if img.shape[0] > size:
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
+    elif img.shape[0] < size:
+        img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
+    return img

utils/exif.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import piexif
+import piexif.helper
+from .html import plaintext_to_html
+def get_image_info(rawimage):
+    items = rawimage.info
+    geninfo = ""
+    if "exif" in rawimage.info:
+        exif = piexif.load(rawimage.info["exif"])
+        exif_comment = (exif or {}).get("Exif", {}).get(piexif.ExifIFD.UserComment, b"")
+        try:
+            exif_comment = piexif.helper.UserComment.load(exif_comment)
+        except ValueError:
+            exif_comment = exif_comment.decode("utf8", errors="ignore")
+        items["exif comment"] = exif_comment
+        geninfo = exif_comment
+        for field in [
+            "jfif",
+            "jfif_version",
+            "jfif_unit",
+            "jfif_density",
+            "dpi",
+            "exif",
+            "loop",
+            "background",
+            "timestamp",
+            "duration",
+        ]:
+            items.pop(field, None)
+    geninfo = items.get("parameters", geninfo)
+    info = f"""
+    <p><h4>PNG Info</h4></p>
+    """
+    for key, text in items.items():
+        info += (
+                f"""
+    <div>
+    <p><b>{plaintext_to_html(str(key))}</b></p>
+    <p>{plaintext_to_html(str(text))}</p>
+    </div>
+    """.strip()
+                + "\n"
+        )
+    if len(info) == 0:
+        message = "Nothing found in the image."
+        info = f"<div><p>{message}<p></div>"
+    return info

utils/html.py ADDED Viewed

	@@ -0,0 +1,8 @@

+import html
+def plaintext_to_html(text):
+    text = (
+            "<p>" + "<br>\n".join([f"{html.escape(x)}" for x in text.split("\n")]) + "</p>"
+    )
+    return text

utils/image2text.py ADDED Viewed

	@@ -0,0 +1,195 @@

+from __future__ import annotations
+import PIL.Image
+import huggingface_hub
+import numpy as np
+import onnxruntime as rt
+import pandas as pd
+import torch
+from transformers import AutoModelForCausalLM
+from transformers import AutoProcessor
+from . import dbimutils
+from .singleton import Singleton
+import torch
+from clip_interrogator import Config, Interrogator
+device = "cuda" if torch.cuda.is_available() else "cpu"
+@Singleton
+class Models(object):
+    # WD14 models
+    SWIN_MODEL_REPO = "SmilingWolf/wd-v1-4-swinv2-tagger-v2"
+    CONV_MODEL_REPO = "SmilingWolf/wd-v1-4-convnext-tagger-v2"
+    CONV2_MODEL_REPO = "SmilingWolf/wd-v1-4-convnextv2-tagger-v2"
+    VIT_MODEL_REPO = "SmilingWolf/wd-v1-4-vit-tagger-v2"
+    MODEL_FILENAME = "model.onnx"
+    LABEL_FILENAME = "selected_tags.csv"
+    # CLIP models
+    VIT_H_14_MODEL_REPO = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"  # Stable Diffusion 2.X
+    VIT_L_14_MODEL_REPO = "openai/clip-vit-large-patch14"  # Stable Diffusion 1.X
+    def __init__(self):
+        pass
+    @classmethod
+    def load_clip_model(cls, model_repo):
+        config = Config()
+        config.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+        config.blip_offload = False if torch.cuda.is_available() else True
+        config.chunk_size = 2048
+        config.flavor_intermediate_count = 512
+        config.blip_num_beams = 64
+        config.clip_model_name = model_repo
+        ci = Interrogator(config)
+        return ci
+    def __getattr__(self, item):
+        if item in self.__dict__:
+            return getattr(self, item)
+        print(f"Loading {item}...")
+        if item in ('clip_vit_h_14_model',):
+            self.clip_vit_h_14_model = self.load_clip_model(self.VIT_H_14_MODEL_REPO)
+        if item in ('clip_vit_l_14_model',):
+            self.clip_vit_l_14_model = self.load_clip_model(self.VIT_L_14_MODEL_REPO)
+        if item in ('swinv2_model',):
+            self.swinv2_model = self.load_model(self.SWIN_MODEL_REPO, self.MODEL_FILENAME)
+        if item in ('convnext_model',):
+            self.convnext_model = self.load_model(self.CONV_MODEL_REPO, self.MODEL_FILENAME)
+        if item in ('vit_model',):
+            self.vit_model = self.load_model(self.VIT_MODEL_REPO, self.MODEL_FILENAME)
+        if item in ('convnextv2_model',):
+            self.convnextv2_model = self.load_model(self.CONV2_MODEL_REPO, self.MODEL_FILENAME)
+        if item in ('git_model', 'git_processor'):
+            self.git_model, self.git_processor = self.load_git_model()
+        if item in ('tag_names', 'rating_indexes', 'general_indexes', 'character_indexes'):
+            self.tag_names, self.rating_indexes, self.general_indexes, self.character_indexes = self.load_w14_labels()
+        return getattr(self, item)
+    @classmethod
+    def load_git_model(cls):
+        model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-coco")
+        processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
+        return model, processor
+    @staticmethod
+    def load_model(model_repo: str, model_filename: str) -> rt.InferenceSession:
+        path = huggingface_hub.hf_hub_download(
+            model_repo, model_filename,
+        )
+        model = rt.InferenceSession(path)
+        return model
+    @classmethod
+    def load_w14_labels(cls) -> list[str]:
+        path = huggingface_hub.hf_hub_download(
+            cls.CONV2_MODEL_REPO, cls.LABEL_FILENAME
+        )
+        df = pd.read_csv(path)
+        tag_names = df["name"].tolist()
+        rating_indexes = list(np.where(df["category"] == 9)[0])
+        general_indexes = list(np.where(df["category"] == 0)[0])
+        character_indexes = list(np.where(df["category"] == 4)[0])
+        return [tag_names, rating_indexes, general_indexes, character_indexes]
+models = Models.instance()
+def clip_image2text(image, mode_type='best', model_name='vit_h_14'):
+    image = image.convert('RGB')
+    model = getattr(models, f'clip_{model_name}_model')
+    if mode_type == 'classic':
+        prompt = model.interrogate_classic(image)
+    elif mode_type == 'fast':
+        prompt = model.interrogate_fast(image)
+    elif mode_type == 'negative':
+        prompt = model.interrogate_negative(image)
+    else:
+        prompt = model.interrogate(image)  # default to best
+    return prompt
+def git_image2text(input_image, max_length=50):
+    image = input_image.convert('RGB')
+    pixel_values = models.git_processor(images=image, return_tensors="pt").to(device).pixel_values
+    generated_ids = models.git_model.to(device).generate(pixel_values=pixel_values, max_length=max_length)
+    generated_caption = models.git_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_caption
+def w14_image2text(
+        image: PIL.Image.Image,
+        model_name: str,
+        general_threshold: float,
+        character_threshold: float,
+):
+    tag_names: list[str] = models.tag_names
+    rating_indexes: list[np.int64] = models.rating_indexes
+    general_indexes: list[np.int64] = models.general_indexes
+    character_indexes: list[np.int64] = models.character_indexes
+    model_name = "{}_model".format(model_name.lower())
+    model = getattr(models, model_name)
+    _, height, width, _ = model.get_inputs()[0].shape
+    # Alpha to white
+    image = image.convert("RGBA")
+    new_image = PIL.Image.new("RGBA", image.size, "WHITE")
+    new_image.paste(image, mask=image)
+    image = new_image.convert("RGB")
+    image = np.asarray(image)
+    # PIL RGB to OpenCV BGR
+    image = image[:, :, ::-1]
+    image = dbimutils.make_square(image, height)
+    image = dbimutils.smart_resize(image, height)
+    image = image.astype(np.float32)
+    image = np.expand_dims(image, 0)
+    input_name = model.get_inputs()[0].name
+    label_name = model.get_outputs()[0].name
+    probs = model.run([label_name], {input_name: image})[0]
+    labels = list(zip(tag_names, probs[0].astype(float)))
+    # First 4 labels are actually ratings: pick one with argmax
+    ratings_names = [labels[i] for i in rating_indexes]
+    rating = dict(ratings_names)
+    # Then we have general tags: pick any where prediction confidence > threshold
+    general_names = [labels[i] for i in general_indexes]
+    general_res = [x for x in general_names if x[1] > general_threshold]
+    general_res = dict(general_res)
+    # Everything else is characters: pick any where prediction confidence > threshold
+    character_names = [labels[i] for i in character_indexes]
+    character_res = [x for x in character_names if x[1] > character_threshold]
+    character_res = dict(character_res)
+    b = dict(sorted(general_res.items(), key=lambda item: item[1], reverse=True))
+    a = (
+        ", ".join(list(b.keys()))
+        .replace("_", " ")
+        .replace("(", "\(")
+        .replace(")", "\)")
+    )
+    c = ", ".join(list(b.keys()))
+    d = " ".join(list(b.keys()))
+    return a, c, d, rating, character_res, general_res

utils/singleton.py ADDED Viewed

	@@ -0,0 +1,37 @@

+class Singleton:
+    """
+    A non-thread-safe helper class to ease implementing singletons.
+    This should be used as a decorator -- not a metaclass -- to the
+    class that should be a singleton.
+    The decorated class can define one `__init__` function that
+    takes only the `self` argument. Also, the decorated class cannot be
+    inherited from. Other than that, there are no restrictions that apply
+    to the decorated class.
+    To get the singleton instance, use the `instance` method. Trying
+    to use `__call__` will result in a `TypeError` being raised.
+    """
+    def __init__(self, decorated):
+        self._decorated = decorated
+    def instance(self):
+        """
+        Returns the singleton instance. Upon its first call, it creates a
+        new instance of the decorated class and calls its `__init__` method.
+        On all subsequent calls, the already created instance is returned.
+        """
+        try:
+            return self._instance
+        except AttributeError:
+            self._instance = self._decorated()
+            return self._instance
+    def __call__(self):
+        raise TypeError('Singletons must be accessed through `instance()`.')
+    def __instancecheck__(self, inst):
+        return isinstance(inst, self._decorated)

utils/translate.py ADDED Viewed

	@@ -0,0 +1,59 @@

+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+import torch
+from .singleton import Singleton
+device = "cuda" if torch.cuda.is_available() else "cpu"
+@Singleton
+class Models(object):
+    def __getattr__(self, item):
+        if item in self.__dict__:
+            return getattr(self, item)
+        if item in ('zh2en_model', 'zh2en_tokenizer',):
+            self.zh2en_model, self.zh2en_tokenizer = self.load_zh2en_model()
+        if item in ('en2zh_model', 'en2zh_tokenizer',):
+            self.en2zh_model, self.en2zh_tokenizer = self.load_en2zh_model()
+        return getattr(self, item)
+    @classmethod
+    def load_en2zh_model(cls):
+        en2zh_model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-zh").eval()
+        en2zh_tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-zh")
+        return en2zh_model, en2zh_tokenizer
+    @classmethod
+    def load_zh2en_model(cls):
+        zh2en_model = AutoModelForSeq2SeqLM.from_pretrained('Helsinki-NLP/opus-mt-zh-en').eval()
+        zh2en_tokenizer = AutoTokenizer.from_pretrained('Helsinki-NLP/opus-mt-zh-en')
+        return zh2en_model, zh2en_tokenizer,
+models = Models.instance()
+def zh2en(text):
+    with torch.no_grad():
+        encoded = models.zh2en_tokenizer([text], return_tensors="pt")
+        sequences = models.zh2en_model.generate(**encoded)
+        return models.zh2en_tokenizer.batch_decode(sequences, skip_special_tokens=True)[0]
+def en2zh(text):
+    with torch.no_grad():
+        encoded = models.en2zh_tokenizer([text], return_tensors="pt")
+        sequences = models.en2zh_model.generate(**encoded)
+        return models.en2zh_tokenizer.batch_decode(sequences, skip_special_tokens=True)[0]
+if __name__ == "__main__":
+    input = "青春不能回头，所以青春没有终点。 ——《火影忍者》"
+    en = zh2en(input)
+    print(input, en)
+    zh = en2zh(en)
+    print(en, zh)