File size: 12,140 Bytes
df511cb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
from utilities import convert_history_to_openai_format, move_file_to_temp_dir, remake_temp_dir
from integrations import public_sdxl_gen
from prompts import sys_p, sys_plan_init, sys_plan_revise, sys_sd_prompt, sys_extract_json_prompt, revise_template
import openai
import gradio as gr
import json
import logging
import time
from pathlib import Path
# Utilities for user message and extracting AI's last response
def add_text(msg, chatbot):
chatbot.append([msg, None])
return "", chatbot
def extract_plan(history : list[tuple]) -> str:
return history[-1][1]
def flip_edit_display(isEditing, edit, display):
if isEditing:
return [False, gr.update(visible=False), gr.update(visible=True, value=edit)]
else:
return [True, gr.update(visible=True), gr.update(visible=False)]
# Streaming mode chat
# TODO: Convert to use gradio's new low-code/high level Chat component
def bot(history):
msg_in = convert_history_to_openai_format(sys_p, history)
response = openai.ChatCompletion.create(
model="accounts/fireworks/models/llama-v2-13b-chat-w8a16",
messages=msg_in,
temperature=0.7,
max_tokens=512,
stream=True
)
history[-1][1] = ""
for chunk in response:
txt = chunk.choices[0].delta.get("content", "")
if txt is not None:
history[-1][1] += txt
yield history
def instruct(system_prompt : str, message : str) -> str:
"""
General purpose LLM/AI invocation. Use llama v2 13b chat as an instruct model by wrapping it up
in a single turn conversation.
"""
response = openai.ChatCompletion.create(
model="accounts/fireworks/models/llama-v2-13b-chat-w8a16",
messages=[{"role":"system", "content": system_prompt}, {"role":"user", "content": message}],
temperature=0.7,
max_tokens=2048
)
return response.choices[0].message.content
def attempt_extraction(plan : str) -> list:
"""
Attempt to turn natural language text into structured format. We use this flow/pattern:
1. Use a coding focused LLM which work better on this kind of task. (guess only)
2. Use guidance like technique (called "nudge") - prim AI into a JSON only output
by writing the beginning part of the answer for it.
3. Due to limitation of specific API provider, use streaming mode and detect stop token
ourself. Here the JSON is enclosed in markdown codeblock so we stop at ```.
4. Then backfill the nudge tokens and attempt a parse.
"""
response = openai.Completion.create(
model="accounts/fireworks/models/starcoder-16b-w8a16",
prompt=sys_extract_json_prompt.format(plan=plan),
temperature=0.1,
max_tokens=1280,
stream=True
)
ans = ""
for chunk in response:
txt = chunk.choices[0].get("text", "")
if txt is not None:
ans += txt
if "```" in ans:
break
ans = "{ \"prompts\":[{" + ans
loc = ans.index("```")
ans = ans[:loc]
return json.loads(ans)["prompts"]
#buffer_imgs = []
#mydata = []
def mock_get_img(index):
time.sleep(2)
return (42, "/content/first_test_alpha{i}.png".format(i=index))
def extract_and_gen_photo(data, photo_dir : str):
cur_imgs = []
myseeds = []
#data = attempt_extraction(plan)
logging.info(data)
#mydata.append(data)
num_item = len(data)
i = 0.0
def progress(n, desc="Running..."):
gr.Warning("Overall progress: {n}%, queue status: {desc}".format(n=n*100, desc=desc))
for item in data:
#(seed, img) = gen_photo_segmind(item["prompt"], "", 1024, 1024) # w, h
(seed, img) = public_sdxl_gen(item["prompt"], "", progress, i/num_item, 30)
#(seed, img) = mock_get_img(int(i+1))
new_path = move_file_to_temp_dir(photo_dir, img, item["filename"])
logging.debug(type(img))
logging.debug(img)
#buffer_imgs.append([seed, img])
#img.save(item["filename"] + ".jpg", "JPG")
i += 1.0
cur_imgs.append( (new_path, item["caption"]) )
myseeds.append(seed)
yield (cur_imgs, ",".join(map(str, myseeds)))
def do_backup(user_dir : str) -> list[str]:
return [p for p in Path(user_dir).rglob('*')]
#return [fpath for (fpath, caption) in gallery]
# Main UI Area
doc_initial = """## Initial/Discovery Conversation
Chat to the AI designer! Press enter to submit your message (begin by saying hi). When it gave a high level plan,
press "Extract" and wait a bit, you will be taken to the next step.
*Note*: Currently this is a crude method and we just directly copy the AI's last message in full.
"""
doc_planning = """## Draft/Revise plan
The AI will now devise a more concrete/low level plan. You can also work interactively with the AI to further
revise it. Let's look at the buttons:
- In each panel, you can flip between editing manually or displaying it in Markdown format using the flip button.
- For backup, it is recommended to copy the markdown source code in edit mode.
- Describe what changes you would like to make in the bottom most textbox, then press "Revise this plan"...
- The side by side is for you to compare two proposals. Each one has a set of buttons below it. Revising the proposal you've chosen will discard the other one, then AI will show the one you chose to revise against the revised version it made.
- When you're happy, pick one of the proposal and click "Accept this plan". You will be taken to the next step.
"""
doc_save_warning = """# Illustration stage
Now AI will generate a set of prompts for stable diffusion. Then you can trigger actually generating the photos...
# Important Notice
- Highly experimental (due to flakiness of JSON extraction, a known weakness of LLM technology) and may fail. No gaurantee offered.
- Currently using public HF space `victor/SDXL-0.9`, each photo takes about 90 seconds, and there can be a long queue.
- You will be informed of the progress every 30 seconds. Don't worry about the NaNs/None - I'm just too lazy to convert the data properly... it usually means the job is just starting or your reqest is being processed. On the other hand, it will show some numbers if you are queued up.
- **Remember to save your work!** To do so, head over to the Backup tab to download your photos.
- TODO: Also download texts generated. For now one workaround is to copy paste text manually.
"""
doc_backup = """# Backup
To backup, press the button and wait a bit. Then download the files. The seed (that can regenerate the same photo in principles) are also provided.
"""
with gr.Blocks() as demo:
with gr.Tabs() as tabs:
with gr.TabItem("Initial Discovery", id=0):
gr.Markdown(doc_initial)
chatbot = gr.Chatbot()
msg = gr.Textbox()
next1_btn = gr.Button(value="Extract")
with gr.TabItem("Design", id=1):
with gr.Row():
gr.Markdown(doc_planning)
with gr.Row():
with gr.Column():
high_level_plan_edit = gr.TextArea(label="High level plan", show_label=True, show_copy_button=True, interactive=True, visible=False)
high_level_plan_display = gr.Markdown(visible=True)
high_level_plan_isEditing = gr.State(False)
high_level_plan_flip_btn = gr.Button(value="Flip Edit/Display")
gr.Markdown("TODO: ability to regen from modified high level plan")
with gr.Column():
proposal_old_edit = gr.TextArea(label="Current Proposal", show_label=True, show_copy_button=True, interactive=True, visible=True)
proposal_old_display = gr.Markdown(visible=False)
proposal_old_isEditing = gr.State(True)
proposal_old_flip_btn = gr.Button(value="Flip Edit/Display")
revise_old = gr.Button(value="Revise this plan")
accept_old = gr.Button(value="Accept this plan")
with gr.Column():
proposal_new_edit = gr.TextArea(label="New Proposal", show_label=True, show_copy_button=True, interactive=True, visible=True)
proposal_new_display = gr.Markdown(visible=False)
proposal_new_isEditing = gr.State(True)
proposal_new_flip_btn = gr.Button(value="Flip Edit/Display")
revise_new = gr.Button(value="Revise this plan")
accept_new = gr.Button(value="Accept this plan")
with gr.Row():
revision_txt = gr.Textbox(label="Instruction for revising draft")
with gr.TabItem("Illustration", id=2):
with gr.Row():
with gr.Column():
low_level_plan = gr.Markdown()
sd_prompts = gr.TextArea(label="Prompt Suggestions", show_label=True, show_copy_button=True, interactive=True)
diagnostic_json = gr.JSON()
gen_photo = gr.Button(value="Generate Photos! (Highly Experimental)")
with gr.Column():
gr.Markdown(doc_save_warning)
photos = gr.Gallery()
photo_dir = gr.State(None)
with gr.TabItem("Backup", id=3):
gr.Markdown(doc_backup)
seeds = gr.Textbox(label="Seeds used for photo generation")
backup_btn = gr.Button(value="Backup Photos!")
backup_download_area = gr.Files()
msg.submit(add_text, [msg, chatbot], [msg, chatbot]) \
.then(bot, chatbot, chatbot)
next1_btn.click(extract_plan, chatbot, high_level_plan_edit) \
.then(lambda x: x, high_level_plan_edit, high_level_plan_display) \
.then(lambda: gr.Tabs.update(selected=1), None, tabs) \
.then(lambda x: instruct(sys_plan_init, x), high_level_plan_edit, proposal_old_edit)
high_level_plan_flip_btn.click(flip_edit_display, [high_level_plan_isEditing, high_level_plan_edit, high_level_plan_display]
, [high_level_plan_isEditing, high_level_plan_edit, high_level_plan_display])
proposal_old_flip_btn.click(flip_edit_display, [proposal_old_isEditing, proposal_old_edit, proposal_old_display]
, [proposal_old_isEditing, proposal_old_edit, proposal_old_display])
proposal_new_flip_btn.click(flip_edit_display, [proposal_new_isEditing, proposal_new_edit, proposal_new_display]
, [proposal_new_isEditing, proposal_new_edit, proposal_new_display])
revise_old.click(lambda plan, rev: instruct(sys_plan_revise, revise_template.format(plan=plan, rev=rev)),
[proposal_old_edit, revision_txt], proposal_new_edit) \
.then(lambda x, y: [x, y], [proposal_old_edit, proposal_new_edit], [proposal_old_display, proposal_new_display])
revise_new.click(lambda plan, rev: [plan, instruct(sys_plan_revise, revise_template.format(plan=plan, rev=rev))],
[proposal_new_edit, revision_txt], [proposal_old_edit, proposal_new_edit]) \
.then(lambda x, y: [x, y], [proposal_old_edit, proposal_new_edit], [proposal_old_display, proposal_new_display])
accept_old.click(lambda x: x, proposal_old_edit, low_level_plan) \
.then(lambda: gr.Tabs.update(selected=2), None, tabs) \
.then(lambda x: instruct(sys_sd_prompt, x), low_level_plan, sd_prompts)
accept_new.click(lambda x: x, proposal_new_edit, low_level_plan) \
.then(lambda: gr.Tabs.update(selected=2), None, tabs) \
.then(lambda x: instruct(sys_sd_prompt, x), low_level_plan, sd_prompts)
#gen_photo.click(extract_and_gen_photo, sd_prompts, photos)
gen_photo.click(attempt_extraction, sd_prompts, diagnostic_json) \
.success(remake_temp_dir, photo_dir, photo_dir) \
.success(extract_and_gen_photo, [diagnostic_json, photo_dir], [photos, seeds])
backup_btn.click(do_backup, photo_dir, backup_download_area)
demo.queue(concurrency_count=2, max_size=20).launch(debug=False)
# Dumping Area
#from PIL import Image
#import PIL
|