Spaces:
Sleeping
Sleeping
import math | |
from omegaconf import OmegaConf | |
from scripts.rendertext_tool import Render_Text, load_model_from_config, load_model_ckpt | |
import gradio as gr | |
import os | |
import torch | |
import time | |
from PIL import Image | |
from cldm.hack import disable_verbosity, enable_sliced_attention | |
# from pytorch_lightning import seed_everything | |
from example_list import examples | |
def process_multi_wrapper(rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3, | |
shared_prompt, | |
width_0, width_1, width_2, width_3, | |
ratio_0, ratio_1, ratio_2, ratio_3, | |
top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3, | |
top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3, | |
yaw_0, yaw_1, yaw_2, yaw_3, | |
num_rows_0, num_rows_1, num_rows_2, num_rows_3, | |
shared_num_samples, shared_image_resolution, | |
shared_ddim_steps, shared_guess_mode, | |
shared_strength, shared_scale, shared_seed, | |
shared_eta, shared_a_prompt, shared_n_prompt, allow_run_generation = True): | |
if not allow_run_generation: | |
return "Please get the glyph image first by clicking the 'Render Glyph Image' button", None, allow_run_generation | |
rendered_txt_values = [rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3] | |
width_values = [width_0, width_1, width_2, width_3] | |
ratio_values = [ratio_0, ratio_1, ratio_2, ratio_3] | |
top_left_x_values = [top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3] | |
top_left_y_values = [top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3] | |
yaw_values = [yaw_0, yaw_1, yaw_2, yaw_3] | |
num_rows_values = [num_rows_0, num_rows_1, num_rows_2, num_rows_3] | |
allow_run_generation = False | |
return "The image generation process finished!", render_tool.process_multi(rendered_txt_values, shared_prompt, | |
width_values, ratio_values, | |
top_left_x_values, top_left_y_values, | |
yaw_values, num_rows_values, | |
shared_num_samples, shared_image_resolution, | |
shared_ddim_steps, shared_guess_mode, | |
shared_strength, shared_scale, shared_seed, | |
shared_eta, shared_a_prompt, shared_n_prompt | |
), allow_run_generation | |
def process_multi_wrapper_only_show_rendered(rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3, | |
shared_prompt, | |
width_0, width_1, width_2, width_3, | |
ratio_0, ratio_1, ratio_2, ratio_3, | |
top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3, | |
top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3, | |
yaw_0, yaw_1, yaw_2, yaw_3, | |
num_rows_0, num_rows_1, num_rows_2, num_rows_3, | |
shared_num_samples, shared_image_resolution, | |
shared_ddim_steps, shared_guess_mode, | |
shared_strength, shared_scale, shared_seed, | |
shared_eta, shared_a_prompt, shared_n_prompt): | |
rendered_txt_values = [rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3] | |
width_values = [width_0, width_1, width_2, width_3] | |
ratio_values = [ratio_0, ratio_1, ratio_2, ratio_3] | |
top_left_x_values = [top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3] | |
top_left_y_values = [top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3] | |
yaw_values = [yaw_0, yaw_1, yaw_2, yaw_3] | |
num_rows_values = [num_rows_0, num_rows_1, num_rows_2, num_rows_3] | |
allow_run_generation = True | |
glyph_image = render_tool.process_multi(rendered_txt_values, shared_prompt, | |
width_values, ratio_values, | |
top_left_x_values, top_left_y_values, | |
yaw_values, num_rows_values, | |
shared_num_samples, shared_image_resolution, | |
shared_ddim_steps, shared_guess_mode, | |
shared_strength, shared_scale, shared_seed, | |
shared_eta, shared_a_prompt, shared_n_prompt, | |
only_show_rendered_image=True) | |
if glyph_image[0] is None: | |
return "Warning: no glyph image would be rendered because the glyph insructions are not provided!", None, allow_run_generation | |
else: | |
return "The glyph image is successfully rendered!", glyph_image, allow_run_generation | |
def load_ckpt(model_ckpt = "LAION-Glyph-10M-Epoch-5"): | |
global render_tool, model | |
if torch.cuda.is_available(): | |
for i in range(5): | |
torch.cuda.empty_cache() | |
time.sleep(2) | |
print("empty the cuda cache") | |
# if model_ckpt == "LAION-Glyph-1M": | |
# model = load_model_ckpt(model, "laion1M_model_wo_ema.ckpt") | |
# if model_ckpt == "LAION-Glyph-10M-Epoch-5": | |
# model = load_model_ckpt(model, "laion10M_epoch_5_model_wo_ema.ckpt") | |
if model_ckpt == "LAION-Glyph-10M-Epoch-6": | |
model = load_model_ckpt(model, "checkpoints/laion10M_epoch_6_model_wo_ema.ckpt") | |
elif model_ckpt == "TextCaps-5K-Epoch-10": | |
model = load_model_ckpt(model, "checkpoints/textcaps5K_epoch_10_model_wo_ema.ckpt") | |
elif model_ckpt == "TextCaps-5K-Epoch-20": | |
model = load_model_ckpt(model, "checkpoints/textcaps5K_epoch_20_model_wo_ema.ckpt") | |
elif model_ckpt == "TextCaps-5K-Epoch-40": | |
model = load_model_ckpt(model, "checkpoints/textcaps5K_epoch_40_model_wo_ema.ckpt") | |
render_tool = Render_Text(model, save_memory = SAVE_MEMORY) | |
output_str = f"already change the model checkpoint to {model_ckpt}" | |
print(output_str) | |
if torch.cuda.is_available(): | |
for i in range(5): | |
torch.cuda.empty_cache() | |
time.sleep(2) | |
print("empty the cuda cache") | |
allow_run_generation = False | |
return output_str, None, allow_run_generation | |
SAVE_MEMORY = True #False | |
disable_verbosity() | |
if SAVE_MEMORY: | |
enable_sliced_attention() | |
cfg = OmegaConf.load("config.yaml") | |
model = load_model_from_config(cfg, "checkpoints/laion10M_epoch_6_model_wo_ema.ckpt", verbose=True) | |
render_tool = Render_Text(model, save_memory = SAVE_MEMORY) | |
description = """ | |
## Control Stable Diffusion with Glyph Images | |
Github link: [Link](https://github.com/AIGText/GlyphControl-release)\n | |
Report: [link](https://arxiv.org/pdf/2305.18259.pdf)\n | |
You could try the listed examples at the bottom by clicking on them. We will update the examples progressively. | |
""" | |
SPACE_ID = os.getenv('SPACE_ID') | |
if SPACE_ID is not None: | |
# description += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. < a href=" ">< img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></ a></p >' | |
description += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>' | |
block = gr.Blocks().queue() | |
with block: | |
with gr.Row(): | |
gr.Markdown(description) | |
only_show_rendered_image = gr.Number(value=1, visible=False) | |
default_width = [0.3, 0.3, 0.3, 0.3] | |
default_top_left_x = [0.35, 0.15, 0.15, 0.5] | |
default_top_left_y = [0.4, 0.15, 0.65, 0.65] | |
with gr.Column(): | |
with gr.Row(): | |
for i in range(4): | |
with gr.Column(): | |
exec(f"""rendered_txt_{i} = gr.Textbox(label=f"Render Text {i+1}")""") | |
with gr.Accordion(f"Advanced options {i+1}", open=False): | |
exec(f"""width_{i} = gr.Slider(label="Bbox Width", minimum=0., maximum=1, value={default_width[i]}, step=0.01) """) | |
exec(f"""ratio_{i} = gr.Slider(label="Bbox_width_height_ratio", minimum=0., maximum=5, value=0., step=0.02, visible=False) """) | |
# exec(f"""top_left_x_{i} = gr.Slider(label="Bbox Top Left x", minimum=0., maximum=1, value={0.35 - 0.25 * math.cos(math.pi * i)}, step=0.01) """) | |
# exec(f"""top_left_y_{i} = gr.Slider(label="Bbox Top Left y", minimum=0., maximum=1, value={0.1 if i < 2 else 0.6}, step=0.01) """) | |
exec(f"""top_left_x_{i} = gr.Slider(label="Bbox Top Left x", minimum=0., maximum=1, value={default_top_left_x[i]}, step=0.01) """) | |
exec(f"""top_left_y_{i} = gr.Slider(label="Bbox Top Left y", minimum=0., maximum=1, value={default_top_left_y[i]}, step=0.01) """) | |
exec(f"""yaw_{i} = gr.Slider(label="Bbox Yaw", minimum=-20, maximum=20, value=0, step=5) """) | |
# exec(f"""num_rows_{i} = gr.Slider(label="num_rows", minimum=1, maximum=4, value=1, step=1, visible=False) """) | |
exec(f"""num_rows_{i} = gr.Slider(label="num_rows", minimum=1, maximum=4, value=1, step=1) """) | |
with gr.Row(): | |
with gr.Column(): | |
shared_prompt = gr.Textbox(label="Shared Prompt") | |
with gr.Row(): | |
show_render_button = gr.Button(value="Render Glyph Image") | |
run_button = gr.Button(value="Run Generation") | |
allow_run_generation = gr.Checkbox(label='allow_run_generation', | |
value=False, visible=False) | |
with gr.Accordion("Model Options", open=False): | |
with gr.Row(): | |
# model_ckpt = gr.inputs.Dropdown(["LAION-Glyph-10M", "Textcaps5K-10"], label="Checkpoint", default = "LAION-Glyph-10M") | |
# model_ckpt = gr.inputs.Dropdown(["LAION-Glyph-10M-Epoch-6", "LAION-Glyph-10M-Epoch-5", "LAION-Glyph-1M"], label="Checkpoint", default = "LAION-Glyph-10M-Epoch-6") | |
model_ckpt = gr.inputs.Dropdown(["LAION-Glyph-10M-Epoch-6", "TextCaps-5K-Epoch-10", "TextCaps-5K-Epoch-20", "TextCaps-5K-Epoch-40"], label="Checkpoint", default = "LAION-Glyph-10M-Epoch-6") | |
# load_button = gr.Button(value = "Load Checkpoint") | |
with gr.Accordion("Shared Advanced Options", open=False): | |
with gr.Row(): | |
shared_num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=5, step=1) | |
shared_image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=768, value=512, step=64, visible=False) | |
shared_strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01, visible=False) | |
shared_guess_mode = gr.Checkbox(label='Guess Mode', value=False, visible=False) | |
shared_seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True) | |
with gr.Row(): | |
shared_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1) | |
shared_ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1) | |
shared_eta = gr.Number(label="eta (DDIM)", value=0.0, visible=False) | |
with gr.Row(): | |
shared_a_prompt = gr.Textbox(label="Added Prompt", value='4K, dslr, best quality, extremely detailed') | |
shared_n_prompt = gr.Textbox(label="Negative Prompt", | |
value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality') | |
with gr.Accordion("Output", open=True): | |
with gr.Row(): | |
message = gr.Text(interactive=False, label = "Message") | |
with gr.Row(): | |
result_gallery = gr.Gallery(label='Images', show_label=False, elem_id="gallery").style(grid=2, height='auto') | |
export_button = gr.Button(value="Export Parameters") | |
gr.Examples( | |
examples= examples, #"./examples", | |
# [[, "LAION-Glyph-10M-Epoch-6"]], | |
# ["./assets/img2.jpg", "r50-hdetr_sam-vit-b"], | |
# ["./assets/img3.jpg", "r50-hdetr_sam-vit-b"], | |
# ["./assets/img4.jpg", "r50-hdetr_sam-vit-b"]], | |
inputs=[ model_ckpt, shared_prompt, | |
rendered_txt_0, width_0, ratio_0, top_left_x_0, top_left_y_0, yaw_0, num_rows_0, | |
rendered_txt_1, width_1, ratio_1, top_left_x_1, top_left_y_1, yaw_1, num_rows_1, | |
rendered_txt_2, width_2, ratio_2, top_left_x_2, top_left_y_2, yaw_2, num_rows_2, | |
rendered_txt_3, width_3, ratio_3, top_left_x_3, top_left_y_3, yaw_3, num_rows_3, | |
shared_num_samples, shared_image_resolution, | |
shared_ddim_steps, shared_guess_mode, | |
shared_strength, shared_scale, shared_seed, | |
shared_eta, shared_a_prompt, shared_n_prompt], | |
# outputs=output_img, | |
# fn=inference | |
) | |
run_button.click(fn=process_multi_wrapper, | |
inputs=[rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3, | |
shared_prompt, | |
width_0, width_1, width_2, width_3, | |
ratio_0, ratio_1, ratio_2, ratio_3, | |
top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3, | |
top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3, | |
yaw_0, yaw_1, yaw_2, yaw_3, | |
num_rows_0, num_rows_1, num_rows_2, num_rows_3, | |
shared_num_samples, shared_image_resolution, | |
shared_ddim_steps, shared_guess_mode, | |
shared_strength, shared_scale, shared_seed, | |
shared_eta, shared_a_prompt, shared_n_prompt, allow_run_generation], | |
outputs=[message, result_gallery, allow_run_generation]) | |
show_render_button.click(fn=process_multi_wrapper_only_show_rendered, | |
inputs=[rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3, | |
shared_prompt, | |
width_0, width_1, width_2, width_3, | |
ratio_0, ratio_1, ratio_2, ratio_3, | |
top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3, | |
top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3, | |
yaw_0, yaw_1, yaw_2, yaw_3, | |
num_rows_0, num_rows_1, num_rows_2, num_rows_3, | |
shared_num_samples, shared_image_resolution, | |
shared_ddim_steps, shared_guess_mode, | |
shared_strength, shared_scale, shared_seed, | |
shared_eta, shared_a_prompt, shared_n_prompt], | |
outputs=[message, result_gallery, allow_run_generation]) | |
model_ckpt.change(load_ckpt, | |
inputs = [model_ckpt], | |
outputs = [message, result_gallery, allow_run_generation] | |
) | |
block.launch() |