lemonteaa commited on
Commit
df511cb
1 Parent(s): b7b30a7

First attempt

Browse files
Files changed (5) hide show
  1. app.py +253 -0
  2. integrations.py +16 -0
  3. prompts.py +56 -0
  4. requirements.txt +2 -0
  5. utilities.py +25 -0
app.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utilities import convert_history_to_openai_format, move_file_to_temp_dir, remake_temp_dir
2
+ from integrations import public_sdxl_gen
3
+ from prompts import sys_p, sys_plan_init, sys_plan_revise, sys_sd_prompt, sys_extract_json_prompt, revise_template
4
+
5
+ import openai
6
+ import gradio as gr
7
+
8
+ import json
9
+ import logging
10
+ import time
11
+ from pathlib import Path
12
+
13
+ # Utilities for user message and extracting AI's last response
14
+ def add_text(msg, chatbot):
15
+ chatbot.append([msg, None])
16
+ return "", chatbot
17
+
18
+ def extract_plan(history : list[tuple]) -> str:
19
+ return history[-1][1]
20
+
21
+ def flip_edit_display(isEditing, edit, display):
22
+ if isEditing:
23
+ return [False, gr.update(visible=False), gr.update(visible=True, value=edit)]
24
+ else:
25
+ return [True, gr.update(visible=True), gr.update(visible=False)]
26
+
27
+ # Streaming mode chat
28
+ # TODO: Convert to use gradio's new low-code/high level Chat component
29
+ def bot(history):
30
+ msg_in = convert_history_to_openai_format(sys_p, history)
31
+ response = openai.ChatCompletion.create(
32
+ model="accounts/fireworks/models/llama-v2-13b-chat-w8a16",
33
+ messages=msg_in,
34
+ temperature=0.7,
35
+ max_tokens=512,
36
+ stream=True
37
+ )
38
+ history[-1][1] = ""
39
+ for chunk in response:
40
+ txt = chunk.choices[0].delta.get("content", "")
41
+ if txt is not None:
42
+ history[-1][1] += txt
43
+ yield history
44
+
45
+
46
+ def instruct(system_prompt : str, message : str) -> str:
47
+ """
48
+ General purpose LLM/AI invocation. Use llama v2 13b chat as an instruct model by wrapping it up
49
+ in a single turn conversation.
50
+ """
51
+ response = openai.ChatCompletion.create(
52
+ model="accounts/fireworks/models/llama-v2-13b-chat-w8a16",
53
+ messages=[{"role":"system", "content": system_prompt}, {"role":"user", "content": message}],
54
+ temperature=0.7,
55
+ max_tokens=2048
56
+ )
57
+ return response.choices[0].message.content
58
+
59
+
60
+ def attempt_extraction(plan : str) -> list:
61
+ """
62
+ Attempt to turn natural language text into structured format. We use this flow/pattern:
63
+
64
+ 1. Use a coding focused LLM which work better on this kind of task. (guess only)
65
+ 2. Use guidance like technique (called "nudge") - prim AI into a JSON only output
66
+ by writing the beginning part of the answer for it.
67
+ 3. Due to limitation of specific API provider, use streaming mode and detect stop token
68
+ ourself. Here the JSON is enclosed in markdown codeblock so we stop at ```.
69
+ 4. Then backfill the nudge tokens and attempt a parse.
70
+ """
71
+ response = openai.Completion.create(
72
+ model="accounts/fireworks/models/starcoder-16b-w8a16",
73
+ prompt=sys_extract_json_prompt.format(plan=plan),
74
+ temperature=0.1,
75
+ max_tokens=1280,
76
+ stream=True
77
+ )
78
+ ans = ""
79
+ for chunk in response:
80
+ txt = chunk.choices[0].get("text", "")
81
+ if txt is not None:
82
+ ans += txt
83
+ if "```" in ans:
84
+ break
85
+ ans = "{ \"prompts\":[{" + ans
86
+ loc = ans.index("```")
87
+ ans = ans[:loc]
88
+ return json.loads(ans)["prompts"]
89
+
90
+ #buffer_imgs = []
91
+ #mydata = []
92
+
93
+ def mock_get_img(index):
94
+ time.sleep(2)
95
+ return (42, "/content/first_test_alpha{i}.png".format(i=index))
96
+
97
+ def extract_and_gen_photo(data, photo_dir : str):
98
+ cur_imgs = []
99
+ myseeds = []
100
+ #data = attempt_extraction(plan)
101
+ logging.info(data)
102
+ #mydata.append(data)
103
+ num_item = len(data)
104
+ i = 0.0
105
+ def progress(n, desc="Running..."):
106
+ gr.Warning("Overall progress: {n}%, queue status: {desc}".format(n=n*100, desc=desc))
107
+
108
+ for item in data:
109
+ #(seed, img) = gen_photo_segmind(item["prompt"], "", 1024, 1024) # w, h
110
+ (seed, img) = public_sdxl_gen(item["prompt"], "", progress, i/num_item, 30)
111
+ #(seed, img) = mock_get_img(int(i+1))
112
+ new_path = move_file_to_temp_dir(photo_dir, img, item["filename"])
113
+ logging.debug(type(img))
114
+ logging.debug(img)
115
+ #buffer_imgs.append([seed, img])
116
+ #img.save(item["filename"] + ".jpg", "JPG")
117
+ i += 1.0
118
+ cur_imgs.append( (new_path, item["caption"]) )
119
+ myseeds.append(seed)
120
+ yield (cur_imgs, ",".join(map(str, myseeds)))
121
+
122
+ def do_backup(user_dir : str) -> list[str]:
123
+ return [p for p in Path(user_dir).rglob('*')]
124
+ #return [fpath for (fpath, caption) in gallery]
125
+
126
+ # Main UI Area
127
+
128
+ doc_initial = """## Initial/Discovery Conversation
129
+ Chat to the AI designer! Press enter to submit your message (begin by saying hi). When it gave a high level plan,
130
+ press "Extract" and wait a bit, you will be taken to the next step.
131
+
132
+ *Note*: Currently this is a crude method and we just directly copy the AI's last message in full.
133
+ """
134
+
135
+ doc_planning = """## Draft/Revise plan
136
+ The AI will now devise a more concrete/low level plan. You can also work interactively with the AI to further
137
+ revise it. Let's look at the buttons:
138
+
139
+ - In each panel, you can flip between editing manually or displaying it in Markdown format using the flip button.
140
+ - For backup, it is recommended to copy the markdown source code in edit mode.
141
+ - Describe what changes you would like to make in the bottom most textbox, then press "Revise this plan"...
142
+ - The side by side is for you to compare two proposals. Each one has a set of buttons below it. Revising the proposal you've chosen will discard the other one, then AI will show the one you chose to revise against the revised version it made.
143
+ - When you're happy, pick one of the proposal and click "Accept this plan". You will be taken to the next step.
144
+ """
145
+
146
+ doc_save_warning = """# Illustration stage
147
+ Now AI will generate a set of prompts for stable diffusion. Then you can trigger actually generating the photos...
148
+
149
+ # Important Notice
150
+ - Highly experimental (due to flakiness of JSON extraction, a known weakness of LLM technology) and may fail. No gaurantee offered.
151
+ - Currently using public HF space `victor/SDXL-0.9`, each photo takes about 90 seconds, and there can be a long queue.
152
+ - You will be informed of the progress every 30 seconds. Don't worry about the NaNs/None - I'm just too lazy to convert the data properly... it usually means the job is just starting or your reqest is being processed. On the other hand, it will show some numbers if you are queued up.
153
+ - **Remember to save your work!** To do so, head over to the Backup tab to download your photos.
154
+ - TODO: Also download texts generated. For now one workaround is to copy paste text manually.
155
+ """
156
+
157
+ doc_backup = """# Backup
158
+ To backup, press the button and wait a bit. Then download the files. The seed (that can regenerate the same photo in principles) are also provided.
159
+ """
160
+
161
+ with gr.Blocks() as demo:
162
+ with gr.Tabs() as tabs:
163
+ with gr.TabItem("Initial Discovery", id=0):
164
+ gr.Markdown(doc_initial)
165
+ chatbot = gr.Chatbot()
166
+ msg = gr.Textbox()
167
+ next1_btn = gr.Button(value="Extract")
168
+ with gr.TabItem("Design", id=1):
169
+ with gr.Row():
170
+ gr.Markdown(doc_planning)
171
+ with gr.Row():
172
+ with gr.Column():
173
+ high_level_plan_edit = gr.TextArea(label="High level plan", show_label=True, show_copy_button=True, interactive=True, visible=False)
174
+ high_level_plan_display = gr.Markdown(visible=True)
175
+ high_level_plan_isEditing = gr.State(False)
176
+ high_level_plan_flip_btn = gr.Button(value="Flip Edit/Display")
177
+ gr.Markdown("TODO: ability to regen from modified high level plan")
178
+ with gr.Column():
179
+ proposal_old_edit = gr.TextArea(label="Current Proposal", show_label=True, show_copy_button=True, interactive=True, visible=True)
180
+ proposal_old_display = gr.Markdown(visible=False)
181
+ proposal_old_isEditing = gr.State(True)
182
+ proposal_old_flip_btn = gr.Button(value="Flip Edit/Display")
183
+ revise_old = gr.Button(value="Revise this plan")
184
+ accept_old = gr.Button(value="Accept this plan")
185
+ with gr.Column():
186
+ proposal_new_edit = gr.TextArea(label="New Proposal", show_label=True, show_copy_button=True, interactive=True, visible=True)
187
+ proposal_new_display = gr.Markdown(visible=False)
188
+ proposal_new_isEditing = gr.State(True)
189
+ proposal_new_flip_btn = gr.Button(value="Flip Edit/Display")
190
+ revise_new = gr.Button(value="Revise this plan")
191
+ accept_new = gr.Button(value="Accept this plan")
192
+ with gr.Row():
193
+ revision_txt = gr.Textbox(label="Instruction for revising draft")
194
+ with gr.TabItem("Illustration", id=2):
195
+ with gr.Row():
196
+ with gr.Column():
197
+ low_level_plan = gr.Markdown()
198
+ sd_prompts = gr.TextArea(label="Prompt Suggestions", show_label=True, show_copy_button=True, interactive=True)
199
+ diagnostic_json = gr.JSON()
200
+ gen_photo = gr.Button(value="Generate Photos! (Highly Experimental)")
201
+ with gr.Column():
202
+ gr.Markdown(doc_save_warning)
203
+ photos = gr.Gallery()
204
+ photo_dir = gr.State(None)
205
+ with gr.TabItem("Backup", id=3):
206
+ gr.Markdown(doc_backup)
207
+ seeds = gr.Textbox(label="Seeds used for photo generation")
208
+ backup_btn = gr.Button(value="Backup Photos!")
209
+ backup_download_area = gr.Files()
210
+
211
+
212
+ msg.submit(add_text, [msg, chatbot], [msg, chatbot]) \
213
+ .then(bot, chatbot, chatbot)
214
+ next1_btn.click(extract_plan, chatbot, high_level_plan_edit) \
215
+ .then(lambda x: x, high_level_plan_edit, high_level_plan_display) \
216
+ .then(lambda: gr.Tabs.update(selected=1), None, tabs) \
217
+ .then(lambda x: instruct(sys_plan_init, x), high_level_plan_edit, proposal_old_edit)
218
+
219
+ high_level_plan_flip_btn.click(flip_edit_display, [high_level_plan_isEditing, high_level_plan_edit, high_level_plan_display]
220
+ , [high_level_plan_isEditing, high_level_plan_edit, high_level_plan_display])
221
+ proposal_old_flip_btn.click(flip_edit_display, [proposal_old_isEditing, proposal_old_edit, proposal_old_display]
222
+ , [proposal_old_isEditing, proposal_old_edit, proposal_old_display])
223
+ proposal_new_flip_btn.click(flip_edit_display, [proposal_new_isEditing, proposal_new_edit, proposal_new_display]
224
+ , [proposal_new_isEditing, proposal_new_edit, proposal_new_display])
225
+
226
+ revise_old.click(lambda plan, rev: instruct(sys_plan_revise, revise_template.format(plan=plan, rev=rev)),
227
+ [proposal_old_edit, revision_txt], proposal_new_edit) \
228
+ .then(lambda x, y: [x, y], [proposal_old_edit, proposal_new_edit], [proposal_old_display, proposal_new_display])
229
+ revise_new.click(lambda plan, rev: [plan, instruct(sys_plan_revise, revise_template.format(plan=plan, rev=rev))],
230
+ [proposal_new_edit, revision_txt], [proposal_old_edit, proposal_new_edit]) \
231
+ .then(lambda x, y: [x, y], [proposal_old_edit, proposal_new_edit], [proposal_old_display, proposal_new_display])
232
+
233
+ accept_old.click(lambda x: x, proposal_old_edit, low_level_plan) \
234
+ .then(lambda: gr.Tabs.update(selected=2), None, tabs) \
235
+ .then(lambda x: instruct(sys_sd_prompt, x), low_level_plan, sd_prompts)
236
+ accept_new.click(lambda x: x, proposal_new_edit, low_level_plan) \
237
+ .then(lambda: gr.Tabs.update(selected=2), None, tabs) \
238
+ .then(lambda x: instruct(sys_sd_prompt, x), low_level_plan, sd_prompts)
239
+
240
+ #gen_photo.click(extract_and_gen_photo, sd_prompts, photos)
241
+ gen_photo.click(attempt_extraction, sd_prompts, diagnostic_json) \
242
+ .success(remake_temp_dir, photo_dir, photo_dir) \
243
+ .success(extract_and_gen_photo, [diagnostic_json, photo_dir], [photos, seeds])
244
+
245
+ backup_btn.click(do_backup, photo_dir, backup_download_area)
246
+
247
+ demo.queue(concurrency_count=2, max_size=20).launch(debug=False)
248
+
249
+
250
+ # Dumping Area
251
+
252
+ #from PIL import Image
253
+ #import PIL
integrations.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio_client import Client
2
+
3
+ import random
4
+ import time
5
+
6
+ def public_sdxl_gen(prompt : str, neg_prompt : str, progress : callable, n, poll_time):
7
+ seed = random.randrange(1000000000, 1999999999)
8
+ client = Client("https://victor-sdxl-0-9.hf.space/")
9
+ job = client.submit(prompt, neg_prompt, 10, 50, seed, api_name="/predict")
10
+ while not job.done():
11
+ job_status = job.status()
12
+ progress_str = "{r}/{q}, eta = {eta}".format(r=job_status.rank,q=job_status.queue_size,eta=job_status.eta)
13
+ progress(n, desc=progress_str)
14
+ time.sleep(poll_time)
15
+ return (seed, job.result())
16
+
prompts.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ sys_p = """You are an interior designer who help people design their dream house.
2
+ You will first engage in a discovery interview with the client to understand their situation, need, preferences, etc.
3
+ You then proposes designs taking into account both aesthetics and praticality."""
4
+
5
+ sys_plan_init = """You are an interior designer who help people design their dream house.
6
+ You have just finished a discovery conversation with the client. The user will now send you the high level design plan you wrote last time.
7
+ Your task: Based on the high level design plan, create a more concrete proposal outlining various specific elements. Make your choice on things like color scheme etc.
8
+ """
9
+
10
+ revise_template = """Your current proposal is as follows:
11
+ {plan}
12
+ ----
13
+ I would like to propose this change:
14
+ {rev}
15
+ """
16
+
17
+ sys_plan_revise = """You are an interior designer who help people design their dream house.
18
+ You have drafted a concrete proposal and are now in editing phase. The user would like to propose a revision. The relevant contexts and instruction will be provided in user's message.
19
+ """
20
+
21
+ sys_sd_prompt = """You are an interior designer who help people design their dream house.
22
+ You have created a design proposal, which the user will tell you in his/her message.
23
+ There is an advanced image generating AI, that is able to take as input a text prompt, and output a high quality photo according to the description.
24
+ Generate multiple photos (up to a limit of 6 photos) using this AI to illustrate your design above, showing different areas of the house and highlighting various elements of your ideas.
25
+ Your answer should be a list of text prompt to give to the image generating AI, taking into acoount the fact that you can exert fine control over the image by being more specific in the text prompt, specifying colors, textures, style, arrangement of items, etc.
26
+ Important: The preferred style of the text prompt is comma seprated phrases describing the essential content, skipping unnecessary filler words. It is also preferred to be concrete and specific, and only describe things relevant to the task of drawing in the text prompt as the AI may be confused by higher level concepts.
27
+ **You should also provide caption accompanying each generated photo.**
28
+ No need to repeat the prompt as a stand in for the image as your output will be further processed before being shown to user.
29
+ """
30
+
31
+ sys_extract_json_prompt = """### Instruction
32
+ JSON is a versatile format that can encode and represent many structured data. In this exercise, you will convert a natural language text into JSON.
33
+
34
+ The input text contain a series of suggested text prompts for an image generating AI, along with optionally suggested caption for each image.
35
+
36
+ Rules:
37
+ 0. As it is natural text, the prompts may appear in different free form formats. There also *may or may not be* distracting text in the input.
38
+ 1. You should identify and locate the part in the input containing the actual text prompts and captions.
39
+
40
+ 2. Next, You should output a JSON according to a schema.
41
+
42
+ 3. The JSON you output should be a structured representation of the prompts mentioned in the input text that you located in rule 1.
43
+ 4. The required schema is that it should be a JSON object with single attribute "prompts" containing a list of prompt object. Each prompt object contains three fields: "prompt", "caption", and "filename".
44
+ 5. For each prompt suggested or mentioned in the input, infer a suitable, **unqiue, and short** filename from the input using common sense, and give the caption field the same value as the prompt **if** one is not present in input. Notice filename should have no file extension.
45
+
46
+ Hint: The prompts are intended to show various rooms or areas in a house, so one possible way to name the file is to name it after the room or area it is portraying. You may add a numeric suffix in case more than one prompts show the same room or area.
47
+
48
+ ### Input
49
+ {plan}
50
+ ### Response
51
+ Let's think step by step.
52
+ I have identified the part in the input containing the prompts and captions, and ignored any materials in the input outside of the prompts and captions.
53
+ Here's the JSON you requested, converting from input, making sure not to copy the example's content:
54
+ ```
55
+ {{
56
+ "prompts": [{{"""
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ openai
2
+ gradio
utilities.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import shutil
3
+ import os
4
+ from pathlib import Path
5
+
6
+ def convert_history_to_openai_format(system_prompt : str, history : list[tuple]):
7
+ messages = [{ "role": "system", "content": system_prompt }]
8
+ for (user_msg, assistant_msg) in history:
9
+ messages.append({ "role": "user", "content": user_msg })
10
+ if assistant_msg is not None:
11
+ messages.append({ "role": "assistant", "content": assistant_msg })
12
+ return messages
13
+
14
+ # Test case
15
+ #print(convert_history_to_openai_format("You are a helpful assistant.", [["Hi! How are you?", "I'm fine!, and you?"], ["Testing", None]]))
16
+
17
+ def move_file_to_temp_dir(dest_dir : str, src_full_path: str, rename_component : str) -> str:
18
+ p = Path(src_full_path)
19
+ dest_full = os.path.join(dest_dir, rename_component + "_" + p.stem + p.suffix)
20
+ shutil.move(src_full_path, dest_full)
21
+ return dest_full
22
+
23
+ def remake_temp_dir(dir : str) -> str:
24
+ temp_dir = tempfile.mkdtemp()
25
+ return temp_dir