fxmeng commited on
Commit
be1b457
1 Parent(s): 4e6199d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import time
3
+ import base64
4
+ from openai import OpenAI
5
+
6
+ def wait_on_run(run, client, thread):
7
+ while run.status == "queued" or run.status == "in_progress":
8
+ run = client.beta.threads.runs.retrieve(
9
+ thread_id=thread.id,
10
+ run_id=run.id,
11
+ )
12
+ time.sleep(0.5)
13
+ return run
14
+
15
+ def GenerateImageByCode(client, message, code_prompt):
16
+ assistant = client.beta.assistants.create(
17
+ name = "Chain of Image",
18
+ instructions=code_prompt,
19
+ model="gpt-4-1106-preview",
20
+ tools=[{"type": "code_interpreter"}]
21
+ )
22
+ thread = client.beta.threads.create()
23
+ client.beta.threads.messages.create(
24
+ thread_id=thread.id,
25
+ role="user",
26
+ content=message,
27
+ )
28
+ run = client.beta.threads.runs.create(
29
+ thread_id=thread.id,
30
+ assistant_id=assistant.id,
31
+ )
32
+ run = wait_on_run(run, client, thread)
33
+ run_steps = client.beta.threads.runs.steps.list(thread_id=thread.id, run_id=run.id, order="asc")
34
+ for data in run_steps.model_dump()['data']:
35
+ if "tool_calls" in data['step_details']:
36
+ code = data['step_details']['tool_calls'][0]['code_interpreter']['input']
37
+ if 'image' in data['step_details']['tool_calls'][0]['code_interpreter']['outputs'][0].keys():
38
+ image_id = data['step_details']['tool_calls'][0]['code_interpreter']['outputs'][0]['image']['file_id']
39
+ image_bytes = client.files.with_raw_response.content(image_id).content
40
+ with open(f'{image_id}.png', 'wb') as f:
41
+ f.write(image_bytes)
42
+ base64_image = base64.b64encode(image_bytes).decode('utf-8')
43
+ return f"{image_id}.png", base64_image
44
+
45
+ def visual_question_answer(client, base64_image, question, vqa_prompt, max_tokens=256):
46
+ response = client.chat.completions.create(model="gpt-4-vision-preview",
47
+ messages=[
48
+ {"role": "system", "content": vqa_prompt},
49
+ {"role": "user", "content": [
50
+ {"type": "image_url","image_url": {"url": f"data:image/jpeg;base64,{base64_image}",},},
51
+ {"type": "text", "text": f"Question:\n{question}\nAnswer:\n"},],},
52
+ ], max_tokens=max_tokens,)
53
+ return response.choices[0].message.content
54
+
55
+ def chain_of_images(message, history, code_prompt, vqa_prompt, api_token, max_tokens):
56
+ client = OpenAI(api_key=api_token)
57
+ if len(history):
58
+ return visual_question_answer(client, history[0][1][1], message, vqa_prompt, max_tokens=max_tokens)
59
+ else:
60
+ return GenerateImageByCode(client, message, code_prompt)
61
+
62
+
63
+ def vote(data: gr.LikeData):
64
+ if data.liked:
65
+ print("You upvoted this response: " + data.value)
66
+ else:
67
+ print("You downvoted this response: " + data.value)
68
+
69
+ demo = gr.ChatInterface(chain_of_images,
70
+ additional_inputs=[
71
+ gr.Textbox("You are a research drawing assistant. Your primary role is to help visualize questions posed by users. Instead of directly answering questions, you will use code to invoke the most suitable toolkit, transforming these questions into images. This helps users quickly understand the question and find answers through visualization. You should prioritize clarity and effectiveness in your visual representations, ensuring that complex scientific or technical concepts are made accessible and comprehensible through your drawings.", label="Code Interpreter Prompt"),
72
+ gr.Textbox("You are a visual thinking expert. Your primary role is to answer questions about an image posed by users.", label="VQA Prompt"),
73
+ gr.Textbox(label="API Key"),
74
+ gr.Slider(32, 128),
75
+ ],
76
+ ).queue()
77
+ if __name__ == "__main__":
78
+ demo.launch()