yyk19 commited on
Commit
200818a
1 Parent(s): 1e76fae

support muliple checkpoint options.

Browse files
Files changed (4) hide show
  1. app.py +37 -14
  2. app_old.py +176 -0
  3. laion1M_model_wo_ema.ckpt +3 -0
  4. scripts/rendertext_tool.py +25 -0
app.py CHANGED
@@ -1,9 +1,12 @@
1
  from cldm.ddim_hacked import DDIMSampler
2
  import math
3
  from omegaconf import OmegaConf
4
- from scripts.rendertext_tool import Render_Text, load_model_from_config
5
  import gradio as gr
6
  import os
 
 
 
7
  def process_multi_wrapper(rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3,
8
  shared_prompt,
9
  width_0, width_1, width_2, width_3,
@@ -15,7 +18,7 @@ def process_multi_wrapper(rendered_txt_0, rendered_txt_1, rendered_txt_2, render
15
  shared_num_samples, shared_image_resolution,
16
  shared_ddim_steps, shared_guess_mode,
17
  shared_strength, shared_scale, shared_seed,
18
- shared_eta, shared_a_prompt, shared_n_prompt):
19
 
20
  rendered_txt_values = [rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3]
21
  width_values = [width_0, width_1, width_2, width_3]
@@ -66,23 +69,35 @@ def process_multi_wrapper_only_show_rendered(rendered_txt_0, rendered_txt_1, ren
66
  shared_eta, shared_a_prompt, shared_n_prompt,
67
  only_show_rendered_image=True)
68
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
  cfg = OmegaConf.load("config.yaml")
71
  model = load_model_from_config(cfg, "model_wo_ema.ckpt", verbose=True)
72
  # model = load_model_from_config(cfg, "model_states.pt", verbose=True)
73
  # model = load_model_from_config(cfg, "model.ckpt", verbose=True)
74
-
75
- ddim_sampler = DDIMSampler(model)
76
  render_tool = Render_Text(model)
77
 
78
 
79
- # description = """
80
- # # <center>Expedit-SAM (Expedite Segment Anything Model without any training)</center>
81
- # Github link: [Link](https://github.com/Expedit-LargeScale-Vision-Transformer/Expedit-SAM)
82
- # You can select the speed mode you want to use from the "Speed Mode" dropdown menu and click "Run" to segment the image you uploaded to the "Input Image" box.
83
- # Points per side is a hyper-parameter that controls the number of points used to generate the segmentation masks. The higher the number, the more accurate the segmentation masks will be, but the slower the inference speed will be. The default value is 12.
84
- # """
85
-
86
  description = """
87
  ## Control Stable Diffusion with Glyph Images
88
  """
@@ -121,8 +136,13 @@ with block:
121
  with gr.Row():
122
  run_button = gr.Button(value="Run")
123
  show_render_button = gr.Button(value="Only Rendered")
 
 
 
 
 
124
 
125
- with gr.Accordion("Shared Advanced options", open=False):
126
  with gr.Row():
127
  shared_num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
128
  shared_image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=768, value=512, step=64, visible=False)
@@ -170,7 +190,10 @@ with block:
170
  shared_strength, shared_scale, shared_seed,
171
  shared_eta, shared_a_prompt, shared_n_prompt],
172
  outputs=[result_gallery])
173
-
174
-
 
 
 
175
 
176
  block.launch()
 
1
  from cldm.ddim_hacked import DDIMSampler
2
  import math
3
  from omegaconf import OmegaConf
4
+ from scripts.rendertext_tool import Render_Text, load_model_from_config, load_model_ckpt
5
  import gradio as gr
6
  import os
7
+ import torch
8
+ import time
9
+
10
  def process_multi_wrapper(rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3,
11
  shared_prompt,
12
  width_0, width_1, width_2, width_3,
 
18
  shared_num_samples, shared_image_resolution,
19
  shared_ddim_steps, shared_guess_mode,
20
  shared_strength, shared_scale, shared_seed,
21
+ shared_eta, shared_a_prompt, shared_n_prompt, ):
22
 
23
  rendered_txt_values = [rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3]
24
  width_values = [width_0, width_1, width_2, width_3]
 
69
  shared_eta, shared_a_prompt, shared_n_prompt,
70
  only_show_rendered_image=True)
71
 
72
+ def load_ckpt(model_ckpt = "LAION-Glyph-10M"):
73
+ global render_tool, model
74
+ if torch.cuda.is_available():
75
+ for i in range(5):
76
+ torch.cuda.empty_cache()
77
+ time.sleep(2)
78
+ print("empty the cuda cache")
79
+
80
+ if model_ckpt == "LAION-Glyph-1M":
81
+ model = load_model_ckpt(model, "laion1M_model_wo_ema.ckpt")
82
+ elif model_ckpt == "LAION-Glyph-10M":
83
+ model = load_model_ckpt(model, "model_wo_ema.ckpt")
84
+ render_tool = Render_Text(model)
85
+ output_str = f"already change the model checkpoint to {model_ckpt}"
86
+ print(output_str)
87
+ if torch.cuda.is_available():
88
+ for i in range(5):
89
+ torch.cuda.empty_cache()
90
+ time.sleep(2)
91
+ print("empty the cuda cache")
92
 
93
  cfg = OmegaConf.load("config.yaml")
94
  model = load_model_from_config(cfg, "model_wo_ema.ckpt", verbose=True)
95
  # model = load_model_from_config(cfg, "model_states.pt", verbose=True)
96
  # model = load_model_from_config(cfg, "model.ckpt", verbose=True)
97
+ # ddim_sampler = DDIMSampler(model)
 
98
  render_tool = Render_Text(model)
99
 
100
 
 
 
 
 
 
 
 
101
  description = """
102
  ## Control Stable Diffusion with Glyph Images
103
  """
 
136
  with gr.Row():
137
  run_button = gr.Button(value="Run")
138
  show_render_button = gr.Button(value="Only Rendered")
139
+ with gr.Accordion("Model Options", open=False):
140
+ with gr.Row():
141
+ # model_ckpt = gr.inputs.Dropdown(["LAION-Glyph-10M", "Textcaps5K-10"], label="Checkpoint", default = "LAION-Glyph-10M")
142
+ model_ckpt = gr.inputs.Dropdown(["LAION-Glyph-10M", "LAION-Glyph-1M"], label="Checkpoint", default = "LAION-Glyph-10M")
143
+ load_button = gr.Button(value = "Load Checkpoint")
144
 
145
+ with gr.Accordion("Shared Advanced Options", open=False):
146
  with gr.Row():
147
  shared_num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
148
  shared_image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=768, value=512, step=64, visible=False)
 
190
  shared_strength, shared_scale, shared_seed,
191
  shared_eta, shared_a_prompt, shared_n_prompt],
192
  outputs=[result_gallery])
193
+
194
+ load_button.click(fn = load_ckpt,
195
+ inputs = [model_ckpt],
196
+ outputs = [result_gallery]
197
+ )
198
 
199
  block.launch()
app_old.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from cldm.ddim_hacked import DDIMSampler
2
+ import math
3
+ from omegaconf import OmegaConf
4
+ from scripts.rendertext_tool import Render_Text, load_model_from_config
5
+ import gradio as gr
6
+ import os
7
+ def process_multi_wrapper(rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3,
8
+ shared_prompt,
9
+ width_0, width_1, width_2, width_3,
10
+ ratio_0, ratio_1, ratio_2, ratio_3,
11
+ top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3,
12
+ top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3,
13
+ yaw_0, yaw_1, yaw_2, yaw_3,
14
+ num_rows_0, num_rows_1, num_rows_2, num_rows_3,
15
+ shared_num_samples, shared_image_resolution,
16
+ shared_ddim_steps, shared_guess_mode,
17
+ shared_strength, shared_scale, shared_seed,
18
+ shared_eta, shared_a_prompt, shared_n_prompt):
19
+
20
+ rendered_txt_values = [rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3]
21
+ width_values = [width_0, width_1, width_2, width_3]
22
+ ratio_values = [ratio_0, ratio_1, ratio_2, ratio_3]
23
+ top_left_x_values = [top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3]
24
+ top_left_y_values = [top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3]
25
+ yaw_values = [yaw_0, yaw_1, yaw_2, yaw_3]
26
+ num_rows_values = [num_rows_0, num_rows_1, num_rows_2, num_rows_3]
27
+
28
+ return render_tool.process_multi(rendered_txt_values, shared_prompt,
29
+ width_values, ratio_values,
30
+ top_left_x_values, top_left_y_values,
31
+ yaw_values, num_rows_values,
32
+ shared_num_samples, shared_image_resolution,
33
+ shared_ddim_steps, shared_guess_mode,
34
+ shared_strength, shared_scale, shared_seed,
35
+ shared_eta, shared_a_prompt, shared_n_prompt
36
+ )
37
+
38
+ def process_multi_wrapper_only_show_rendered(rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3,
39
+ shared_prompt,
40
+ width_0, width_1, width_2, width_3,
41
+ ratio_0, ratio_1, ratio_2, ratio_3,
42
+ top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3,
43
+ top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3,
44
+ yaw_0, yaw_1, yaw_2, yaw_3,
45
+ num_rows_0, num_rows_1, num_rows_2, num_rows_3,
46
+ shared_num_samples, shared_image_resolution,
47
+ shared_ddim_steps, shared_guess_mode,
48
+ shared_strength, shared_scale, shared_seed,
49
+ shared_eta, shared_a_prompt, shared_n_prompt):
50
+
51
+ rendered_txt_values = [rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3]
52
+ width_values = [width_0, width_1, width_2, width_3]
53
+ ratio_values = [ratio_0, ratio_1, ratio_2, ratio_3]
54
+ top_left_x_values = [top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3]
55
+ top_left_y_values = [top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3]
56
+ yaw_values = [yaw_0, yaw_1, yaw_2, yaw_3]
57
+ num_rows_values = [num_rows_0, num_rows_1, num_rows_2, num_rows_3]
58
+
59
+ return render_tool.process_multi(rendered_txt_values, shared_prompt,
60
+ width_values, ratio_values,
61
+ top_left_x_values, top_left_y_values,
62
+ yaw_values, num_rows_values,
63
+ shared_num_samples, shared_image_resolution,
64
+ shared_ddim_steps, shared_guess_mode,
65
+ shared_strength, shared_scale, shared_seed,
66
+ shared_eta, shared_a_prompt, shared_n_prompt,
67
+ only_show_rendered_image=True)
68
+
69
+
70
+ cfg = OmegaConf.load("config.yaml")
71
+ model = load_model_from_config(cfg, "model_wo_ema.ckpt", verbose=True)
72
+ # model = load_model_from_config(cfg, "model_states.pt", verbose=True)
73
+ # model = load_model_from_config(cfg, "model.ckpt", verbose=True)
74
+
75
+ ddim_sampler = DDIMSampler(model)
76
+ render_tool = Render_Text(model)
77
+
78
+
79
+ # description = """
80
+ # # <center>Expedit-SAM (Expedite Segment Anything Model without any training)</center>
81
+ # Github link: [Link](https://github.com/Expedit-LargeScale-Vision-Transformer/Expedit-SAM)
82
+ # You can select the speed mode you want to use from the "Speed Mode" dropdown menu and click "Run" to segment the image you uploaded to the "Input Image" box.
83
+ # Points per side is a hyper-parameter that controls the number of points used to generate the segmentation masks. The higher the number, the more accurate the segmentation masks will be, but the slower the inference speed will be. The default value is 12.
84
+ # """
85
+
86
+ description = """
87
+ ## Control Stable Diffusion with Glyph Images
88
+ """
89
+
90
+ SPACE_ID = os.getenv('SPACE_ID')
91
+ if SPACE_ID is not None:
92
+ # description += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. < a href=" ">< img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></ a></p >'
93
+ description += f'\n<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings. <a href="https://huggingface.co/spaces/{SPACE_ID}?duplicate=true"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space" /></a></p>'
94
+
95
+ block = gr.Blocks().queue()
96
+
97
+ with block:
98
+ with gr.Row():
99
+ gr.Markdown(description)
100
+ only_show_rendered_image = gr.Number(value=1, visible=False)
101
+
102
+ with gr.Column():
103
+
104
+ with gr.Row():
105
+ for i in range(4):
106
+ with gr.Column():
107
+ exec(f"""rendered_txt_{i} = gr.Textbox(label=f"Render Text {i+1}")""")
108
+
109
+ with gr.Accordion(f"Advanced options {i+1}", open=False):
110
+ exec(f"""width_{i} = gr.Slider(label="Bbox Width", minimum=0., maximum=1, value=0.3, step=0.01) """)
111
+ exec(f"""ratio_{i} = gr.Slider(label="Bbox_width_height_ratio", minimum=0., maximum=5, value=0., step=0.02, visible=False) """)
112
+ exec(f"""top_left_x_{i} = gr.Slider(label="Bbox Top Left x", minimum=0., maximum=1, value={0.35 - 0.25 * math.cos(math.pi * i)}, step=0.01) """)
113
+ exec(f"""top_left_y_{i} = gr.Slider(label="Bbox Top Left y", minimum=0., maximum=1, value={0.1 if i < 2 else 0.6}, step=0.01) """)
114
+ exec(f"""yaw_{i} = gr.Slider(label="Bbox Yaw", minimum=-180, maximum=180, value=0, step=5) """)
115
+ # exec(f"""num_rows_{i} = gr.Slider(label="num_rows", minimum=1, maximum=4, value=1, step=1, visible=False) """)
116
+ exec(f"""num_rows_{i} = gr.Slider(label="num_rows", minimum=1, maximum=4, value=1, step=1) """)
117
+
118
+ with gr.Row():
119
+ with gr.Column():
120
+ shared_prompt = gr.Textbox(label="Shared Prompt")
121
+ with gr.Row():
122
+ run_button = gr.Button(value="Run")
123
+ show_render_button = gr.Button(value="Only Rendered")
124
+
125
+ with gr.Accordion("Shared Advanced options", open=False):
126
+ with gr.Row():
127
+ shared_num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
128
+ shared_image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=768, value=512, step=64, visible=False)
129
+ shared_strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01, visible=False)
130
+ shared_guess_mode = gr.Checkbox(label='Guess Mode', value=False, visible=False)
131
+ shared_seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
132
+ with gr.Row():
133
+ shared_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
134
+ shared_ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
135
+ shared_eta = gr.Number(label="eta (DDIM)", value=0.0, visible=False)
136
+ with gr.Row():
137
+ shared_a_prompt = gr.Textbox(label="Added Prompt", value='best quality, extremely detailed')
138
+ shared_n_prompt = gr.Textbox(label="Negative Prompt",
139
+ value='longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality')
140
+
141
+ with gr.Row():
142
+ result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
143
+
144
+ run_button.click(fn=process_multi_wrapper,
145
+ inputs=[rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3,
146
+ shared_prompt,
147
+ width_0, width_1, width_2, width_3,
148
+ ratio_0, ratio_1, ratio_2, ratio_3,
149
+ top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3,
150
+ top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3,
151
+ yaw_0, yaw_1, yaw_2, yaw_3,
152
+ num_rows_0, num_rows_1, num_rows_2, num_rows_3,
153
+ shared_num_samples, shared_image_resolution,
154
+ shared_ddim_steps, shared_guess_mode,
155
+ shared_strength, shared_scale, shared_seed,
156
+ shared_eta, shared_a_prompt, shared_n_prompt],
157
+ outputs=[result_gallery])
158
+
159
+ show_render_button.click(fn=process_multi_wrapper_only_show_rendered,
160
+ inputs=[rendered_txt_0, rendered_txt_1, rendered_txt_2, rendered_txt_3,
161
+ shared_prompt,
162
+ width_0, width_1, width_2, width_3,
163
+ ratio_0, ratio_1, ratio_2, ratio_3,
164
+ top_left_x_0, top_left_x_1, top_left_x_2, top_left_x_3,
165
+ top_left_y_0, top_left_y_1, top_left_y_2, top_left_y_3,
166
+ yaw_0, yaw_1, yaw_2, yaw_3,
167
+ num_rows_0, num_rows_1, num_rows_2, num_rows_3,
168
+ shared_num_samples, shared_image_resolution,
169
+ shared_ddim_steps, shared_guess_mode,
170
+ shared_strength, shared_scale, shared_seed,
171
+ shared_eta, shared_a_prompt, shared_n_prompt],
172
+ outputs=[result_gallery])
173
+
174
+
175
+
176
+ block.launch()
laion1M_model_wo_ema.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b86b22188bf580e80773a5ae101bf9787eb258349f3f1acf0ae50fd10cb3fec
3
+ size 6671922039
scripts/rendertext_tool.py CHANGED
@@ -46,6 +46,31 @@ def load_model_from_config(cfg, ckpt, verbose=False, not_use_ckpt=False):
46
  model.eval()
47
  return model
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  class Render_Text:
50
  def __init__(self,
51
  model,
 
46
  model.eval()
47
  return model
48
 
49
+ def load_model_ckpt(model, ckpt, verbose=True):
50
+ map_location = "cpu" if not torch.cuda.is_available() else "cuda"
51
+ print("checkpoint map location:", map_location)
52
+ if ckpt.endswith("model_states.pt"):
53
+ sd = torch.load(ckpt, map_location=map_location)["module"]
54
+ else:
55
+ sd = load_state_dict(ckpt, location=map_location)
56
+
57
+ keys_ = list(sd.keys())[:]
58
+ for k in keys_:
59
+ if k.startswith("module."):
60
+ nk = k[7:]
61
+ sd[nk] = sd[k]
62
+ del sd[k]
63
+
64
+ m, u = model.load_state_dict(sd, strict=False)
65
+ if len(m) > 0 and verbose:
66
+ print("missing keys: {}".format(len(m)))
67
+ print(m)
68
+ if len(u) > 0 and verbose:
69
+ print("unexpected keys: {}".format(len(u)))
70
+ print(u)
71
+ model.eval()
72
+ return model
73
+
74
  class Render_Text:
75
  def __init__(self,
76
  model,