Tony Lian commited on
Commit
f66a953
โ€ข
1 Parent(s): c870232
Files changed (2) hide show
  1. README.md +4 -3
  2. app.py +157 -0
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Llm Grounded Diffusion
3
- emoji: ๐Ÿ‘€
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: gradio
7
  sdk_version: 3.32.0
8
  app_file: app.py
9
- pinned: false
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: LLM Grounded Diffusion
3
+ emoji: ๐Ÿ˜Š
4
  colorFrom: red
5
  colorTo: pink
6
  sdk: gradio
7
  sdk_version: 3.32.0
8
  app_file: app.py
9
+ pinned: true
10
+ tags: [llm, diffusion, grounding, grounded, llm-grounded, text-to-image, language, large language models, layout, generation, generative, customization, personalization, prompting, chatgpt, gpt-3.5, gpt-4]
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import ast
4
+ from matplotlib.patches import Polygon
5
+ from matplotlib.collections import PatchCollection
6
+ import matplotlib.pyplot as plt
7
+
8
+ box_scale = (512, 512)
9
+ size = box_scale
10
+
11
+ bg_prompt_text = "Background prompt: "
12
+
13
+ simplified_prompt = """You are an intelligent bounding box generator. I will provide you with a caption for a photo, image, or painting. Your task is to generate the bounding boxes for the objects mentioned in the caption, along with a background prompt describing the scene. The images are of size 512x512, and the bounding boxes should not overlap or go beyond the image boundaries. Each bounding box should be in the format of (object name, [top-left x coordinate, top-left y coordinate, box width, box height]) and include exactly one object. Do not put objects that are already provided in the bounding boxes into the background prompt. If needed, you can make reasonable guesses. Please refer to the example below for the desired format.
14
+
15
+ Caption: A realistic image of landscape scene depicting a green car parking on the left of a blue truck, with a red air balloon and a bird in the sky
16
+ Objects: [('a green car', [21, 181, 211, 159]), ('a blue truck', [269, 181, 209, 160]), ('a red air balloon', [66, 8, 145, 135]), ('a bird', [296, 42, 143, 100])]
17
+ Background prompt: A realistic image of a landscape scene
18
+
19
+ Caption: A watercolor painting of a wooden table in the living room with an apple on it
20
+ Objects: [('a wooden table', [65, 243, 344, 206]), ('a apple', [206, 306, 81, 69])]
21
+ Background prompt: A watercolor painting of a living room
22
+
23
+ Caption: A watercolor painting of two pandas eating bamboo in a forest
24
+ Objects: [('a panda eating bambooo', [30, 171, 212, 226]), ('a panda eating bambooo', [264, 173, 222, 221])]
25
+ Background prompt: A watercolor painting of a forest
26
+
27
+ Caption: A realistic image of four skiers standing in a line on the snow near a palm tree
28
+ Objects: [('a skier', [5, 152, 139, 168]), ('a skier', [278, 192, 121, 158]), ('a skier', [148, 173, 124, 155]), ('a palm tree', [404, 180, 103, 180])]
29
+ Background prompt: A realistic image of an outdoor scene with snow
30
+
31
+ Caption: An oil painting of a pink dolphin jumping on the left of a steam boat on the sea
32
+ Objects: [('a steam boat', [232, 225, 257, 149]), ('a jumping pink dolphin', [21, 249, 189, 123])]
33
+ Background prompt: An oil painting of the sea
34
+
35
+ Caption: A realistic image of a cat playing with a dog in a park with flowers
36
+ Objects: [('a playful cat', [51, 67, 271, 324]), ('a playful dog', [302, 119, 211, 228])]
37
+ Background prompt: A realistic image of a park with flowers
38
+
39
+ Caption: ไธ€ไธชๅฎขๅŽ…ๅœบๆ™ฏ็š„ๆฒน็”ป๏ผŒๅข™ไธŠๆŒ‚็€็”ต่ง†๏ผŒ็”ต่ง†ไธ‹้ขๆ˜ฏไธ€ไธชๆŸœๅญ๏ผŒๆŸœๅญไธŠๆœ‰ไธ€ไธช่Šฑ็“ถใ€‚
40
+ Objects: [('a tv', [88, 85, 335, 203]), ('a cabinet', [57, 308, 404, 201]), ('a flower vase', [166, 222, 92, 108])]
41
+ Background prompt: An oil painting of a living room scene
42
+
43
+ Caption: {prompt}
44
+ Objects: """
45
+
46
+ def get_lmd_prompt(prompt):
47
+ if prompt == "":
48
+ prompt = "A realistic photo of a gray cat and an orange dog on the grass."
49
+ return simplified_prompt.format(prompt=prompt)
50
+
51
+ def get_layout_image(response):
52
+ gen_boxes, bg_prompt = parse_input(response)
53
+ fig = plt.figure(figsize=(8, 8))
54
+ # https://stackoverflow.com/questions/7821518/save-plot-to-numpy-array
55
+ show_boxes(gen_boxes, bg_prompt)
56
+ # If we haven't already shown or saved the plot, then we need to
57
+ # draw the figure first...
58
+ fig.canvas.draw()
59
+
60
+ # Now we can save it to a numpy array.
61
+ data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
62
+ data = data.reshape(fig.canvas.get_width_height()[::-1] + (3,))
63
+ plt.clf()
64
+ return data
65
+
66
+ def parse_input(text=None):
67
+ try:
68
+ if "Objects: " in text:
69
+ text = text.split("Objects: ")[1]
70
+
71
+ text_split = text.split(bg_prompt_text)
72
+ if len(text_split) == 2:
73
+ gen_boxes, bg_prompt = text_split
74
+ gen_boxes = ast.literal_eval(gen_boxes)
75
+ bg_prompt = bg_prompt.strip()
76
+ except Exception as e:
77
+ raise gr.Error(f"response format invalid: {e} (text: {text})")
78
+
79
+ return gen_boxes, bg_prompt
80
+
81
+ def draw_boxes(anns):
82
+ ax = plt.gca()
83
+ ax.set_autoscale_on(False)
84
+ polygons = []
85
+ color = []
86
+ for ann in anns:
87
+ c = (np.random.random((1, 3))*0.6+0.4)
88
+ [bbox_x, bbox_y, bbox_w, bbox_h] = ann['bbox']
89
+ poly = [[bbox_x, bbox_y], [bbox_x, bbox_y+bbox_h],
90
+ [bbox_x+bbox_w, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y]]
91
+ np_poly = np.array(poly).reshape((4, 2))
92
+ polygons.append(Polygon(np_poly))
93
+ color.append(c)
94
+
95
+ # print(ann)
96
+ name = ann['name'] if 'name' in ann else str(ann['category_id'])
97
+ ax.text(bbox_x, bbox_y, name, style='italic',
98
+ bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 5})
99
+
100
+ p = PatchCollection(polygons, facecolor='none',
101
+ edgecolors=color, linewidths=2)
102
+ ax.add_collection(p)
103
+
104
+
105
+ def show_boxes(gen_boxes, bg_prompt=None):
106
+ anns = [{'name': gen_box[0], 'bbox': gen_box[1]}
107
+ for gen_box in gen_boxes]
108
+
109
+ # White background (to allow line to show on the edge)
110
+ I = np.ones((size[0]+4, size[1]+4, 3), dtype=np.uint8) * 255
111
+
112
+ plt.imshow(I)
113
+ plt.axis('off')
114
+
115
+ if bg_prompt is not None:
116
+ ax = plt.gca()
117
+ ax.text(0, 0, bg_prompt, style='italic',
118
+ bbox={'facecolor': 'white', 'alpha': 0.7, 'pad': 5})
119
+
120
+ c = np.zeros((1, 3))
121
+ [bbox_x, bbox_y, bbox_w, bbox_h] = (0, 0, size[1], size[0])
122
+ poly = [[bbox_x, bbox_y], [bbox_x, bbox_y+bbox_h],
123
+ [bbox_x+bbox_w, bbox_y+bbox_h], [bbox_x+bbox_w, bbox_y]]
124
+ np_poly = np.array(poly).reshape((4, 2))
125
+ polygons = [Polygon(np_poly)]
126
+ color = [c]
127
+ p = PatchCollection(polygons, facecolor='none',
128
+ edgecolors=color, linewidths=2)
129
+ ax.add_collection(p)
130
+
131
+ draw_boxes(anns)
132
+
133
+ with gr.Blocks() as g:
134
+ gr.HTML("""<h1>LLM-grounded Diffusion: Enhancing Prompt Understanding of Text-to-Image Diffusion Models with Large Language Models</h1>
135
+ <p>This is a space that allows you to explore the layouts generated by ChatGPT on your own with a simplified set of examples. The layout-to-image generation part will be added.</p>
136
+ <p>Read our <a href='https://llm-grounded-diffusion.github.io/'>a brief introduction on our project page</a> or <a href='https://arxiv.org/pdf/2305.13655.pdf'>our work on arxiv</a>. <a href='https://llm-grounded-diffusion.github.io/#citation'>Cite our work</a> if our ideas inspire you.</p>
137
+ <p>Tips: you can perform multi-round specification by giving ChatGPT follow-up requests (e.g., make the object boxes bigger).</p>
138
+ <p>Tips: you can also try prompts in Simplified Chinese. If you want to try prompts in another language, translate the first line of last example to your language.<p>""")
139
+ with gr.Tab("Image Prompt to ChatGPT"):
140
+ with gr.Row():
141
+ with gr.Column(scale=1):
142
+ prompt = gr.Textbox(lines=2, label="Prompt for Layout Generation", placeholder="A realistic photo of a gray cat and an orange dog on the grass.")
143
+ greet_btn = gr.Button("Generate Prompt")
144
+ with gr.Column(scale=1):
145
+ output = gr.Textbox(label="Paste this into ChatGPT (GPT-4 usually gives better results)")
146
+ greet_btn.click(fn=get_lmd_prompt, inputs=prompt, outputs=output, api_name="get_lmd_prompt")
147
+
148
+ with gr.Tab("Visualize ChatGPT-generated Layout"):
149
+ with gr.Row():
150
+ with gr.Column(scale=1):
151
+ prompt = gr.Textbox(lines=2, label="Paste ChatGPT response here", placeholder="Paste ChatGPT response here")
152
+ greet_btn = gr.Button("Visualize Layout")
153
+ with gr.Column(scale=1):
154
+ output = gr.Image(shape=(512, 512), elem_classes="img", elem_id="img", css="img {width: 300px}")
155
+ greet_btn.click(fn=get_layout_image, inputs=prompt, outputs=output, api_name="chatgpt-to-layout")
156
+
157
+ g.launch()