#!/usr/bin/env python from __future__ import annotations import os import random import gc import toml import gradio as gr import numpy as np import utils import torch import json import PIL.Image import base64 import safetensors from io import BytesIO from typing import Tuple import gradio_user_history as gr_user_history from huggingface_hub import hf_hub_download from safetensors.torch import load_file from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer from lora_diffusers import LoRANetwork, create_network_from_weights from diffusers.models import AutoencoderKL from diffusers import ( LCMScheduler, StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, DPMSolverMultistepScheduler, DPMSolverSinglestepScheduler, KDPM2DiscreteScheduler, EulerDiscreteScheduler, EulerAncestralDiscreteScheduler, HeunDiscreteScheduler, LMSDiscreteScheduler, DDIMScheduler, DEISMultistepScheduler, UniPCMultistepScheduler, ) DESCRIPTION = "Animagine XL 2.0" if not torch.cuda.is_available(): DESCRIPTION += "\n

Running on CPU 🥶 This demo does not work on CPU.

" IS_COLAB = utils.is_google_colab() or os.getenv("IS_COLAB") == "1" ENABLE_REFINER_PROMPT = os.getenv("ENABLE_REFINER_PROMPT") == "1" MAX_SEED = np.iinfo(np.int32).max HF_TOKEN = os.getenv("HF_TOKEN") CACHE_EXAMPLES = torch.cuda.is_available() and os.getenv("CACHE_EXAMPLES") == "1" MIN_IMAGE_SIZE = int(os.getenv("MIN_IMAGE_SIZE", "512")) MAX_IMAGE_SIZE = int(os.getenv("MAX_IMAGE_SIZE", "2048")) USE_TORCH_COMPILE = os.getenv("USE_TORCH_COMPILE") == "1" ENABLE_CPU_OFFLOAD = os.getenv("ENABLE_CPU_OFFLOAD") == "1" MODEL = os.getenv("MODEL", "Linaqruf/animagine-xl-2.0") torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") if torch.cuda.is_available(): if ENABLE_REFINER_PROMPT: tokenizer = AutoTokenizer.from_pretrained("isek-ai/SDPrompt-RetNet-300M") tuner = AutoModelForCausalLM.from_pretrained( "isek-ai/SDPrompt-RetNet-300M", trust_remote_code=True, ).to(device) vae = AutoencoderKL.from_pretrained( "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16, ) pipe = StableDiffusionXLPipeline.from_pretrained( MODEL, vae=vae, torch_dtype=torch.float16, custom_pipeline="lpw_stable_diffusion_xl", use_safetensors=True, use_auth_token=HF_TOKEN, variant="fp16", ) if ENABLE_CPU_OFFLOAD: pipe.enable_model_cpu_offload() else: pipe.to(device) if USE_TORCH_COMPILE: pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True) else: pipe = None def randomize_seed_fn(seed: int, randomize_seed: bool) -> int: if randomize_seed: seed = random.randint(0, MAX_SEED) return seed def seed_everything(seed): torch.manual_seed(seed) torch.cuda.manual_seed_all(seed) np.random.seed(seed) random.seed(seed) generator = torch.Generator() generator.manual_seed(seed) return generator def get_image_path(base_path: str): extensions = [".jpg", ".jpeg", ".png", ".bmp", ".gif"] for ext in extensions: image_path = base_path + ext if os.path.exists(image_path): return image_path return None def update_lcm_parameter(enable_lcm: bool = False): if enable_lcm: return (2, 8, gr.update(value="LCM"), gr.update(choices=["LCM"])) else: return (12, 50, gr.update(value="Euler a"), gr.update(choices=sampler_list)) def update_selection(selected_state: gr.SelectData): lora_repo = sdxl_loras[selected_state.index]["repo"] lora_weight = sdxl_loras[selected_state.index]["multiplier"] updated_selected_info = f"{lora_repo}" return ( updated_selected_info, selected_state, lora_weight, ) def parse_aspect_ratio(aspect_ratio): if aspect_ratio == "Custom": return None, None width, height = aspect_ratio.split(" x ") return int(width), int(height) def aspect_ratio_handler(aspect_ratio, custom_width, custom_height): if aspect_ratio == "Custom": return custom_width, custom_height else: width, height = parse_aspect_ratio(aspect_ratio) return width, height def create_network(text_encoders, unet, state_dict, multiplier, device): network = create_network_from_weights( text_encoders, unet, state_dict, multiplier, ) network.load_state_dict(state_dict) network.to(device, dtype=unet.dtype) network.apply_to(multiplier=multiplier) return network def get_scheduler(scheduler_config, name): scheduler_map = { "DPM++ 2M Karras": lambda: DPMSolverMultistepScheduler.from_config( scheduler_config, use_karras_sigmas=True ), "DPM++ SDE Karras": lambda: DPMSolverSinglestepScheduler.from_config( scheduler_config, use_karras_sigmas=True ), "DPM++ 2M SDE Karras": lambda: DPMSolverMultistepScheduler.from_config( scheduler_config, use_karras_sigmas=True, algorithm_type="sde-dpmsolver++" ), "Euler": lambda: EulerDiscreteScheduler.from_config(scheduler_config), "Euler a": lambda: EulerAncestralDiscreteScheduler.from_config( scheduler_config ), "DDIM": lambda: DDIMScheduler.from_config(scheduler_config), "LCM": lambda: LCMScheduler.from_config(scheduler_config), } return scheduler_map.get(name, lambda: None)() def free_memory(): torch.cuda.empty_cache() gc.collect() def preprocess_prompt( style_dict, style_name: str, positive: str, negative: str = "", ) -> Tuple[str, str]: p, n = style_dict.get(style_name, styles["(None)"]) return p.format(prompt=positive), n + negative def common_upscale(samples, width, height, upscale_method): return torch.nn.functional.interpolate( samples, size=(height, width), mode=upscale_method ) def upscale(samples, upscale_method, scale_by): width = round(samples.shape[3] * scale_by) height = round(samples.shape[2] * scale_by) s = common_upscale(samples, width, height, upscale_method) return s def prompt_completion( input_text, max_new_tokens=128, do_sample=True, temperature=1.0, top_p=0.95, top_k=20, repetition_penalty=1.2, num_beams=1, ): try: if input_text.strip() == "": return "" inputs = tokenizer( f"{input_text}", return_tensors="pt", add_special_tokens=False )["input_ids"].to(device) result = tuner.generate( inputs, max_new_tokens=max_new_tokens, do_sample=do_sample, temperature=temperature, top_p=top_p, top_k=top_k, repetition_penalty=repetition_penalty, num_beams=num_beams, ) return tokenizer.batch_decode(result, skip_special_tokens=True)[0] except Exception as e: print(f"An error occured: {e}") raise finally: free_memory() def load_and_convert_thumbnail(model_path: str): with safetensors.safe_open(model_path, framework="pt") as f: metadata = f.metadata() if "modelspec.thumbnail" in metadata: base64_data = metadata["modelspec.thumbnail"] prefix, encoded = base64_data.split(",", 1) image_data = base64.b64decode(encoded) image = PIL.Image.open(BytesIO(image_data)) return image return None def generate( prompt: str, negative_prompt: str = "", seed: int = 0, custom_width: int = 1024, custom_height: int = 1024, guidance_scale: float = 12.0, num_inference_steps: int = 50, use_lora: bool = False, lora_weight: float = 1.0, selected_state: str = "", enable_lcm: bool = False, sampler: str = "Euler a", aspect_ratio_selector: str = "1024 x 1024", style_selector: str = "(None)", quality_selector: str = "Standard", use_upscaler: bool = False, upscaler_strength: float = 0.5, upscale_by: float = 1.5, refine_prompt: bool = False, profile: gr.OAuthProfile | None = None, progress=gr.Progress(track_tqdm=True), ) -> PIL.Image.Image: generator = seed_everything(seed) network = None network_state = {"current_lora": None, "multiplier": None} adapter_id = "Linaqruf/lcm-lora-sdxl-rank1" width, height = aspect_ratio_handler( aspect_ratio_selector, custom_width, custom_height, ) if ENABLE_REFINER_PROMPT: if refine_prompt: if not prompt: prompt = random.choice(["1girl, solo", "1boy, solo"]) prompt = prompt_completion(prompt) prompt, negative_prompt = preprocess_prompt( quality_prompt, quality_selector, prompt, negative_prompt ) prompt, negative_prompt = preprocess_prompt( styles, style_selector, prompt, negative_prompt ) if width % 8 != 0: width = width - (width % 8) if height % 8 != 0: height = height - (height % 8) if use_lora: if not selected_state: raise Exception("You must Select a LoRA") repo_name = sdxl_loras[selected_state.index]["repo"] full_path_lora = saved_names[selected_state.index] weight_name = sdxl_loras[selected_state.index]["weights"] lora_sd = load_file(full_path_lora) text_encoders = [pipe.text_encoder, pipe.text_encoder_2] if network_state["current_lora"] != repo_name: network = create_network( text_encoders, pipe.unet, lora_sd, lora_weight, device, ) network_state["current_lora"] = repo_name network_state["multiplier"] = lora_weight elif network_state["multiplier"] != lora_weight: network = create_network( text_encoders, pipe.unet, lora_sd, lora_weight, device, ) network_state["multiplier"] = lora_weight else: if network: network.unapply_to() network = None network_state = { "current_lora": None, "multiplier": None, } if enable_lcm: pipe.load_lora_weights(adapter_id) backup_scheduler = pipe.scheduler pipe.scheduler = get_scheduler(pipe.scheduler.config, sampler) if use_upscaler: upscaler_pipe = StableDiffusionXLImg2ImgPipeline(**pipe.components) metadata = { "prompt": prompt, "negative_prompt": negative_prompt, "resolution": f"{width} x {height}", "guidance_scale": guidance_scale, "num_inference_steps": num_inference_steps, "seed": seed, "sampler": sampler, "enable_lcm": enable_lcm, "sdxl_style": style_selector, "quality_tags": quality_selector, "refine_prompt": refine_prompt, } if use_lora: metadata["use_lora"] = {"selected_lora": repo_name, "multiplier": lora_weight} else: metadata["use_lora"] = None if use_upscaler: new_width = int(width * upscale_by) new_height = int(height * upscale_by) metadata["use_upscaler"] = { "upscale_method": "nearest-exact", "upscaler_strength": upscaler_strength, "upscale_by": upscale_by, "new_resolution": f"{new_width} x {new_height}", } else: metadata["use_upscaler"] = None print(json.dumps(metadata, indent=4)) try: if use_upscaler: latents = pipe( prompt=prompt, negative_prompt=negative_prompt, width=width, height=height, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, generator=generator, output_type="latent", ).images upscaled_latents = upscale(latents, "nearest-exact", upscale_by) image = upscaler_pipe( prompt=prompt, negative_prompt=negative_prompt, image=upscaled_latents, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, strength=upscaler_strength, generator=generator, output_type="pil", ).images[0] else: image = pipe( prompt=prompt, negative_prompt=negative_prompt, width=width, height=height, guidance_scale=guidance_scale, num_inference_steps=num_inference_steps, generator=generator, output_type="pil", ).images[0] if network: network.unapply_to() network = None if profile is not None: gr_user_history.save_image( label=prompt, image=image, profile=profile, metadata=metadata, ) return image, metadata except Exception as e: print(f"An error occured: {e}") raise finally: if network: network.unapply_to() network = None if use_lora: del lora_sd, text_encoders if enable_lcm: pipe.unload_lora_weights() if use_upscaler: del upscaler_pipe pipe.scheduler = backup_scheduler free_memory() examples = [ "face focus, cute, 1girl, green hair, sweater, looking at viewer, upper body, beanie, outdoors, night, turtleneck", "face focus, bishounen, 1boy, green hair, sweater, looking at viewer, upper body, beanie, outdoors, night, turtleneck", "face focus, fu xuan, 1girl, solo, yellow eyes, dress, looking at viewer, hair rings, bare shoulders, long hair, hair ornament, purple hair, bangs, forehead jewel, frills, tassel, jewelry, pink hair", "face focus, bishounen, 1boy, zhongli, looking at viewer, upper body, outdoors, night", "a girl with mesmerizing blue eyes peers at the viewer. Her long, white hair flows gracefully, adorned with stunning blue butterfly hair ornaments", ] quality_prompt_list = [ { "name": "(None)", "prompt": "{prompt}", "negative_prompt": "", }, { "name": "Standard", "prompt": "masterpiece, best quality, {prompt}", "negative_prompt": "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry", }, { "name": "Light", "prompt": "(masterpiece), best quality, expressive eyes, perfect face, {prompt}", "negative_prompt": "(low quality, worst quality:1.2), 3d, watermark, signature, ugly, poorly drawn", }, { "name": "Heavy", "prompt": "(masterpiece), (best quality), (ultra-detailed), {prompt}, illustration, disheveled hair, detailed eyes, perfect composition, moist skin, intricate details, earrings", "negative_prompt": "longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair, extra digit, fewer digits, cropped, worst quality, low quality", }, ] sampler_list = [ "DPM++ 2M Karras", "DPM++ SDE Karras", "DPM++ 2M SDE Karras", "Euler", "Euler a", "DDIM", ] aspect_ratios = [ "1024 x 1024", "1152 x 896", "896 x 1152", "1216 x 832", "832 x 1216", "1344 x 768", "768 x 1344", "1536 x 640", "640 x 1536", "Custom", ] style_list = [ { "name": "(None)", "prompt": "{prompt}", "negative_prompt": "", }, { "name": "Cinematic", "prompt": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy", "negative_prompt": "cartoon, graphic, text, painting, crayon, graphite, abstract, glitch, deformed, mutated, ugly, disfigured", }, { "name": "Photographic", "prompt": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed", "negative_prompt": "drawing, painting, crayon, sketch, graphite, impressionist, noisy, blurry, soft, deformed, ugly", }, { "name": "Anime", "prompt": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed", "negative_prompt": "photo, deformed, black and white, realism, disfigured, low contrast", }, { "name": "Manga", "prompt": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style", "negative_prompt": "ugly, deformed, noisy, blurry, low contrast, realism, photorealistic, Western comic style", }, { "name": "Digital Art", "prompt": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed", "negative_prompt": "photo, photorealistic, realism, ugly", }, { "name": "Pixel art", "prompt": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics", "negative_prompt": "sloppy, messy, blurry, noisy, highly detailed, ultra textured, photo, realistic", }, { "name": "Fantasy art", "prompt": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy", "negative_prompt": "photographic, realistic, realism, 35mm film, dslr, cropped, frame, text, deformed, glitch, noise, noisy, off-center, deformed, cross-eyed, closed eyes, bad anatomy, ugly, disfigured, sloppy, duplicate, mutated, black and white", }, { "name": "Neonpunk", "prompt": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional", "negative_prompt": "painting, drawing, illustration, glitch, deformed, mutated, cross-eyed, ugly, disfigured", }, { "name": "3D Model", "prompt": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting", "negative_prompt": "ugly, deformed, noisy, low poly, blurry, painting", }, ] thumbnail_cache = {} with open("lora.toml", "r") as file: data = toml.load(file) sdxl_loras = [] saved_names = [] for item in data["data"]: model_path = hf_hub_download(item["repo"], item["weights"], token=HF_TOKEN) saved_names.append(model_path) # Store the path in saved_names if model_path not in thumbnail_cache: thumbnail_image = load_and_convert_thumbnail(model_path) thumbnail_cache[model_path] = thumbnail_image else: thumbnail_image = thumbnail_cache[model_path] sdxl_loras.append( { "image": thumbnail_image, # Storing the PIL image object "title": item["title"], "repo": item["repo"], "weights": item["weights"], "multiplier": item.get("multiplier", "1.0"), } ) styles = {k["name"]: (k["prompt"], k["negative_prompt"]) for k in style_list} quality_prompt = { k["name"]: (k["prompt"], k["negative_prompt"]) for k in quality_prompt_list } # saved_names = [ # hf_hub_download(item["repo"], item["weights"], token=HF_TOKEN) # for item in sdxl_loras # ] with gr.Blocks(css="style.css", theme="NoCrypt/miku@1.2.1") as demo: title = gr.HTML( f"""

{DESCRIPTION}

""", elem_id="title", ) gr.Markdown( f"""Gradio demo for [Linaqruf/animagine-xl-2.0](https://huggingface.co/Linaqruf/animagine-xl-2.0)""", elem_id="subtitle", ) gr.DuplicateButton( value="Duplicate Space for private use", elem_id="duplicate-button", visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1", ) selected_state = gr.State() with gr.Row(): with gr.Column(scale=2): with gr.Tab("Txt2img"): with gr.Group(): prompt = gr.Text( label="Prompt", max_lines=5, placeholder="Enter your prompt", ) negative_prompt = gr.Text( label="Negative Prompt", max_lines=5, placeholder="Enter a negative prompt", ) with gr.Accordion(label="Quality Prompt Presets", open=False): quality_selector = gr.Dropdown( label="Quality Prompt Presets", show_label=False, interactive=True, choices=list(quality_prompt.keys()), value="Standard", ) with gr.Row(): enable_lcm = gr.Checkbox(label="Enable LCM", value=False) use_lora = gr.Checkbox(label="Use LoRA", value=False) refine_prompt = gr.Checkbox( label="Refine prompt", value=False, visible=ENABLE_REFINER_PROMPT, ) with gr.Group(visible=False) as lora_group: selector_info = gr.Text( label="Selected LoRA", max_lines=1, value="No LoRA selected.", ) lora_selection = gr.Gallery( value=[(item["image"], item["title"]) for item in sdxl_loras], label="Animagine XL 2.0 LoRA", show_label=False, columns=2, show_share_button=False, ) lora_weight = gr.Slider( label="Multiplier", minimum=-2, maximum=2, step=0.05, value=1, ) with gr.Tab("Advanced Settings"): with gr.Group(): style_selector = gr.Radio( label="Style Preset", container=True, interactive=True, choices=list(styles.keys()), value="(None)", ) with gr.Group(): aspect_ratio_selector = gr.Radio( label="Aspect Ratio", choices=aspect_ratios, value="1024 x 1024", container=True, ) with gr.Group(): use_upscaler = gr.Checkbox(label="Use Upscaler", value=False) with gr.Row() as upscaler_row: upscaler_strength = gr.Slider( label="Strength", minimum=0, maximum=1, step=0.05, value=0.55, visible=False, ) upscale_by = gr.Slider( label="Upscale by", minimum=1, maximum=1.5, step=0.1, value=1.5, visible=False, ) with gr.Group(visible=False) as custom_resolution: with gr.Row(): custom_width = gr.Slider( label="Width", minimum=MIN_IMAGE_SIZE, maximum=MAX_IMAGE_SIZE, step=8, value=1024, ) custom_height = gr.Slider( label="Height", minimum=MIN_IMAGE_SIZE, maximum=MAX_IMAGE_SIZE, step=8, value=1024, ) with gr.Group(): sampler = gr.Dropdown( label="Sampler", choices=sampler_list, interactive=True, value="Euler a", ) with gr.Group(): seed = gr.Slider( label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0 ) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) with gr.Group(): with gr.Row(): guidance_scale = gr.Slider( label="Guidance scale", minimum=1, maximum=20, step=0.1, value=12.0, ) num_inference_steps = gr.Slider( label="Number of inference steps", minimum=1, maximum=100, step=1, value=50, ) with gr.Tab("Past Generation"): gr_user_history.render() with gr.Column(scale=3): with gr.Blocks(): run_button = gr.Button("Generate", variant="primary") result = gr.Image(label="Result", show_label=False) with gr.Accordion(label="Generation Parameters", open=False): gr_metadata = gr.JSON(label="Metadata", show_label=False) gr.Examples( examples=examples, inputs=prompt, outputs=[result, gr_metadata], fn=generate, cache_examples=CACHE_EXAMPLES, ) lora_selection.select( update_selection, outputs=[ selector_info, selected_state, lora_weight, ], queue=False, show_progress=False, ) enable_lcm.change( update_lcm_parameter, inputs=enable_lcm, outputs=[ guidance_scale, num_inference_steps, sampler, sampler, ], queue=False, api_name=False, ) use_lora.change( fn=lambda x: gr.update(visible=x), inputs=use_lora, outputs=lora_group, queue=False, api_name=False, ) use_upscaler.change( fn=lambda x: [gr.update(visible=x), gr.update(visible=x)], inputs=use_upscaler, outputs=[upscaler_strength, upscale_by], queue=False, api_name=False, ) aspect_ratio_selector.change( fn=lambda x: gr.update(visible=x == "Custom"), inputs=aspect_ratio_selector, outputs=custom_resolution, queue=False, api_name=False, ) inputs = [ prompt, negative_prompt, seed, custom_width, custom_height, guidance_scale, num_inference_steps, use_lora, lora_weight, selected_state, enable_lcm, sampler, aspect_ratio_selector, style_selector, quality_selector, use_upscaler, upscaler_strength, upscale_by, refine_prompt, ] prompt.submit( fn=randomize_seed_fn, inputs=[seed, randomize_seed], outputs=seed, queue=False, api_name=False, ).then( fn=generate, inputs=inputs, outputs=result, api_name="run", ) negative_prompt.submit( fn=randomize_seed_fn, inputs=[seed, randomize_seed], outputs=seed, queue=False, api_name=False, ).then( fn=generate, inputs=inputs, outputs=result, api_name=False, ) run_button.click( fn=randomize_seed_fn, inputs=[seed, randomize_seed], outputs=seed, queue=False, api_name=False, ).then( fn=generate, inputs=inputs, outputs=[result, gr_metadata], api_name=False, ) demo.queue(max_size=20).launch(debug=IS_COLAB, share=IS_COLAB)