import spaces
import os
import random
import math
import torch
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.backends.cuda.matmul.allow_tf32 = True
import numpy as np
from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL
from diffusers.pipelines.stable_diffusion_xl.pipeline_stable_diffusion_xl import (
StableDiffusionXLPipeline,
)
from diffusers.schedulers.scheduling_euler_ancestral_discrete import (
EulerAncestralDiscreteScheduler,
)
from diffusers.models.attention_processor import AttnProcessor2_0
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
try:
from dotenv import load_dotenv
load_dotenv()
except:
print("failed to import dotenv (this is not a problem on the production)")
HF_TOKEN = os.environ.get("HF_TOKEN")
assert HF_TOKEN is not None
IMAGE_MODEL_REPO_ID = os.environ.get(
"IMAGE_MODEL_REPO_ID", "OnomaAIResearch/Illustrious-xl-early-release-v0"
)
DART_V3_REPO_ID = os.environ.get("DART_V3_REPO_ID", None)
assert DART_V3_REPO_ID is not None
CPU_OFFLOAD = os.environ.get("CPU_OFFLOAD", "False").lower() == "true"
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048
TEMPLATE = (
"<|bos|>"
#
"<|rating:general|>"
"{aspect_ratio}"
"<|length:medium|>"
#
""
#
""
#
"{subject}"
)
QUALITY_TAGS = "masterpiece, best quality, very aesthetic, newest"
NEGATIVE_PROMPT = "(worst quality, bad quality:1.1), very displeasing, lowres, jaggy lines, 3d, blurry, watermark, signature, copyright notice, logo, scan, jpeg artifacts, chromatic aberration, white outline, film grain, artistic error, bad anatomy, bad hands, wrong hand"
BAN_TAGS = [
"photoshop (medium)",
"clip studio paint (medium)",
"absurdres",
"highres",
"copyright request",
"character request",
"creature request",
]
device = "cuda" if torch.cuda.is_available() else "cpu"
dart = AutoModelForCausalLM.from_pretrained(
DART_V3_REPO_ID,
torch_dtype=torch.bfloat16,
token=HF_TOKEN,
use_cache=True,
device_map="cpu",
)
dart = dart.eval()
dart = dart.requires_grad_(False)
dart = torch.compile(dart)
tokenizer = AutoTokenizer.from_pretrained(DART_V3_REPO_ID)
BAN_TOKENS = [tokenizer.convert_tokens_to_ids([tag]) for tag in BAN_TAGS]
def load_pipeline():
vae = AutoencoderKL.from_pretrained(
"madebyollin/sdxl-vae-fp16-fix",
torch_dtype=torch.float16,
)
pipe = StableDiffusionXLPipeline.from_pretrained(
IMAGE_MODEL_REPO_ID,
vae=vae,
torch_dtype=torch.float16,
use_safetensors=True,
add_watermarker=False,
custom_pipeline="lpw_stable_diffusion_xl",
)
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
if CPU_OFFLOAD: # local
pipe.enable_sequential_cpu_offload(gpu_id=0, device=device)
else:
pipe.to(device) # for spaces
return pipe
if torch.cuda.is_available():
pipe = load_pipeline()
print("Loaded pipeline")
else:
pipe = None
def get_aspect_ratio(width: int, height: int) -> str:
ar = width / height
if ar <= 1 / math.sqrt(3):
return "<|aspect_ratio:ultra_tall|>"
elif ar <= 8 / 9:
return "<|aspect_ratio:tall|>"
elif ar < 9 / 8:
return "<|aspect_ratio:square|>"
elif ar < math.sqrt(3):
return "<|aspect_ratio:wide|>"
else:
return "<|aspect_ratio:ultra_wide|>"
@torch.inference_mode
def generate_prompt(subject: str, aspect_ratio: str):
input_ids = tokenizer.encode_plus(
TEMPLATE.format(aspect_ratio=aspect_ratio, subject=subject),
return_tensors="pt",
).input_ids
print("input_ids:", input_ids)
output_ids = dart.generate(
input_ids,
max_new_tokens=256,
do_sample=True,
temperature=1.0,
top_p=1.0,
top_k=100,
num_beams=1,
bad_words_ids=BAN_TOKENS,
)[0]
generated = output_ids[len(input_ids) :]
decoded = ", ".join(
[
token
for token in tokenizer.batch_decode(generated, skip_special_tokens=True)
if token.strip() != ""
]
)
print("decoded:", decoded)
return decoded
def format_prompt(prompt: str, prompt_suffix: str):
return f"{prompt}, {prompt_suffix}"
@spaces.GPU(duration=30)
@torch.inference_mode
def generate_image(
prompt: str,
negative_prompt: str,
generator,
width: int,
height: int,
guidance_scale: float,
num_inference_steps: int,
):
image = pipe(
prompt=prompt,
negative_prompt=negative_prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
width=width,
height=height,
generator=generator,
).images[0]
return image
def on_generate(
subject: str,
suffix: str,
negative_prompt: str,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
progress=gr.Progress(track_tqdm=True),
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
ar = get_aspect_ratio(width, height)
print("ar:", ar)
prompt = generate_prompt(subject, ar)
prompt = format_prompt(prompt, suffix)
print(prompt)
image = generate_image(
prompt,
negative_prompt,
generator,
width,
height,
guidance_scale,
num_inference_steps,
)
return image, prompt, seed
def on_retry(
prompt: str,
negative_prompt: str,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
progress=gr.Progress(track_tqdm=True),
):
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
print(prompt)
image = generate_image(
prompt,
negative_prompt,
generator,
width,
height,
guidance_scale,
num_inference_steps,
)
return image, prompt, seed
css = """
#col-container {
margin: 0 auto;
max-width: 640px;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown("""
# IllustriousXL Random Gacha
Image model: [IllustriousXL v0.1](https://huggingface.co/OnomaAIResearch/Illustrious-xl-early-release-v0)
""")
with gr.Row():
subject_radio = gr.Dropdown(
label="Subject",
choices=["1girl", "2girls", "1boy", "no humans"],
value="1girl",
)
run_button = gr.Button("Pull gacha", variant="primary", scale=0)
result = gr.Image(label="Gacha result", show_label=False)
with gr.Accordion("Generation details", open=False):
with gr.Row():
prompt_txt = gr.Textbox(label="Generated prompt", interactive=False)
retry_button = gr.Button("🔄 Retry", scale=0)
with gr.Accordion("Advanced Settings", open=False):
prompt_suffix = gr.Text(
label="Prompt suffix",
visible=True,
value=QUALITY_TAGS,
)
negative_prompt = gr.Text(
label="Negative prompt",
placeholder="Enter a negative prompt",
visible=True,
value=NEGATIVE_PROMPT,
)
seed = gr.Slider(
label="Seed",
minimum=0,
maximum=MAX_SEED,
step=1,
value=0,
)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
width = gr.Slider(
label="Width",
minimum=512,
maximum=MAX_IMAGE_SIZE,
step=64,
value=832, # Replace with defaults that work for your model
)
height = gr.Slider(
label="Height",
minimum=512,
maximum=MAX_IMAGE_SIZE,
step=64,
value=1152, # Replace with defaults that work for your model
)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale",
minimum=1.0,
maximum=10.0,
step=0.5,
value=6.5,
)
num_inference_steps = gr.Slider(
label="Number of inference steps",
minimum=20,
maximum=40,
step=1,
value=28,
)
gr.on(
triggers=[run_button.click],
fn=on_generate,
inputs=[
subject_radio,
prompt_suffix,
negative_prompt,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
],
outputs=[result, prompt_txt, seed],
)
gr.on(
triggers=[retry_button.click],
fn=on_retry,
inputs=[
prompt_txt,
negative_prompt,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
],
outputs=[result, prompt_txt, seed],
)
demo.queue().launch()