Spaces:
Runtime error
Runtime error
| import torch | |
| from clip_interrogator import Config, Interrogator | |
| from PIL import Image | |
| import json | |
| from shared import path_manager, settings | |
| from transformers import AutoProcessor, AutoModelForCausalLM | |
| from modules.util import TimeIt | |
| import os | |
| from transformers.dynamic_module_utils import get_imports | |
| from unittest.mock import patch | |
| def brainblip_look(image, prompt, gr): | |
| from transformers import AutoProcessor, BlipForConditionalGeneration | |
| from PIL import Image | |
| gr.Info("BrainBlip is creating Your Prompt") | |
| print(f"Loading BrainBlip.") | |
| processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base") | |
| model = BlipForConditionalGeneration.from_pretrained("braintacles/brainblip").to("cpu") | |
| print(f"Processing...") | |
| inputs = processor(image, return_tensors="pt").to("cpu") | |
| out = model.generate(**inputs, min_length=40, max_new_tokens=150, num_beams=5, repetition_penalty=1.40) | |
| caption = processor.decode(out[0], skip_special_tokens=True) | |
| return caption | |
| def clip_look(image, prompt, gr): | |
| text = "Lets interrogate" | |
| # Unload models, if needed? | |
| #state["pipeline"] = None | |
| gr.Info("Clip is reading Your Prompt") | |
| conf = Config( | |
| device=torch.device("cuda"), | |
| clip_model_name="ViT-L-14/openai", | |
| cache_path=path_manager.model_paths["clip_path"], | |
| ) | |
| conf.apply_low_vram_defaults() | |
| i = Interrogator(conf) | |
| text = i.interrogate(image) | |
| return text | |
| def florence_look(image, prompt, gr): | |
| def fixed_get_imports(filename: str | os.PathLike) -> list[str]: | |
| """Work around for https://huggingface.co/microsoft/Florence-2-large-ft/discussions/4 .""" | |
| if os.path.basename(filename) != "modeling_florence2.py": | |
| return get_imports(filename) | |
| imports = get_imports(filename) | |
| try: | |
| imports.remove("flash_attn") | |
| except: | |
| pass | |
| return imports | |
| text = "Lets interrogate" | |
| print(f"Looking...") | |
| image = image.convert('RGB') | |
| #state["pipeline"] = None | |
| gr.Info("Florence is creating Your Prompt") | |
| with TimeIt(""): | |
| device = "cpu" | |
| torch_dtype = torch.float32 | |
| prompt = "<MORE_DETAILED_CAPTION>" | |
| with patch("transformers.dynamic_module_utils.get_imports", fixed_get_imports): | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "microsoft/Florence-2-large", | |
| torch_dtype=torch_dtype, | |
| trust_remote_code=True | |
| ).to(device) | |
| processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True) | |
| inputs = processor(text=prompt, images=image, return_tensors="pt").to(device, torch_dtype) | |
| print(f"Judging...") | |
| generated_ids = model.generate( | |
| input_ids=inputs["input_ids"], | |
| pixel_values=inputs["pixel_values"], | |
| max_new_tokens=2048, | |
| num_beams=6, | |
| do_sample=False | |
| ) | |
| generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0] | |
| result = processor.post_process_generation(generated_text, task=prompt, image_size=(image.width, image.height)) | |
| text = result[prompt] | |
| return text | |
| looks = { | |
| "brainblip": brainblip_look, | |
| "clip": clip_look, | |
| "florence": florence_look, | |
| } | |
| def look(image, prompt, gr): | |
| if prompt.strip(" :") in looks: | |
| text = looks[prompt.strip()](image, prompt, gr) | |
| else: | |
| if prompt != "": | |
| return prompt | |
| try: | |
| info = image.info | |
| params = info.get("parameters", "") | |
| text = json.dumps(json.loads(params)) | |
| except: | |
| # Default interrogator | |
| interrogator = settings.default_settings.get("interrogator", "florence") | |
| text = looks[interrogator](image, prompt, gr) | |
| return text | |