merterbak commited on
Commit
687c335
1 Parent(s): db7eb7e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +201 -0
app.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import base64
3
+ import markdown
4
+ import gradio as gr
5
+ from openai import OpenAI
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv()
9
+
10
+ XAI_API_KEY = os.getenv("XAI_API_KEY")
11
+
12
+ client = OpenAI(
13
+ api_key=XAI_API_KEY,
14
+ base_url="https://api.x.ai/v1",
15
+ )
16
+
17
+ def build_messages_from_history(history):
18
+ """
19
+ Convert the stored conversation (with user and assistant turns, including images) into a
20
+ messages array suitable for the model. History is a list of tuples:
21
+ [
22
+ ((user_text, user_image_url), assistant_text),
23
+ ...
24
+ ]
25
+ We return a list of messages starting with a system role, followed by alternating user/assistant.
26
+ """
27
+ messages = [
28
+ {
29
+ "role": "system",
30
+ "content": "You are Grok Vision, an assistant designed to understand and describe images and also answer text-based queries. "
31
+ "You should use all previous messages in the conversation as context. Provide clear, positive, and useful responses."
32
+ }
33
+ ]
34
+
35
+ for ((user_text, user_image_url), assistant_text) in history:
36
+ user_content = []
37
+ if user_image_url:
38
+ image_content = {
39
+ "type": "image_url",
40
+ "image_url": {
41
+ "url": user_image_url,
42
+ "detail": "high",
43
+ },
44
+ }
45
+ user_content.append(image_content)
46
+
47
+ if user_text.strip():
48
+ user_content.append({
49
+ "type": "text",
50
+ "text": user_text.strip(),
51
+ })
52
+
53
+ messages.append({
54
+ "role": "user",
55
+ "content": user_content
56
+ })
57
+
58
+ # Add the assistant turn
59
+ messages.append({
60
+ "role": "assistant",
61
+ "content": assistant_text
62
+ })
63
+
64
+ return messages
65
+
66
+ def create_response(history, user_text, user_image_path):
67
+ """
68
+ Given the current history, the user's new message (text), and optional uploaded image path,
69
+ build a new set of messages including the latest user turn, then call the model and update history.
70
+ """
71
+ user_text = user_text.strip()
72
+ user_image_url = ""
73
+
74
+ if user_text.startswith("http"):
75
+ parts = user_text.split(" ", 1)
76
+ user_image_url = parts[0]
77
+ if len(parts) > 1:
78
+ user_text = parts[1]
79
+ else:
80
+ user_text = ""
81
+
82
+ if user_image_path is not None:
83
+ with open(user_image_path, "rb") as f:
84
+ image_bytes = f.read()
85
+ base64_image = base64.b64encode(image_bytes).decode("utf-8")
86
+ user_image_url = f"data:image/jpeg;base64,{base64_image}"
87
+
88
+ temp_history = history.copy()
89
+ temp_history.append(((user_text, user_image_url), "")) # assistant response is empty for now
90
+
91
+ messages = [
92
+ {
93
+ "role": "system",
94
+ "content": "You are Grok Vision, an assistant designed to understand and describe images and also answer text-based queries. "
95
+ "You should use all previous messages in the conversation as context. Provide clear, positive, and useful responses."
96
+ }
97
+ ]
98
+
99
+ # Add all previous turns except the one we just appended (since it has no assistant response yet)
100
+ for ((old_user_text, old_user_image_url), old_assistant_text) in history:
101
+ old_user_content = []
102
+ if old_user_image_url:
103
+ old_user_content.append({
104
+ "type": "image_url",
105
+ "image_url": {
106
+ "url": old_user_image_url,
107
+ "detail": "high",
108
+ },
109
+ })
110
+ if old_user_text.strip():
111
+ old_user_content.append({
112
+ "type": "text",
113
+ "text": old_user_text.strip(),
114
+ })
115
+ messages.append({"role": "user", "content": old_user_content})
116
+ messages.append({"role": "assistant", "content": old_assistant_text})
117
+
118
+ new_user_content = []
119
+ if user_image_url:
120
+ new_user_content.append({
121
+ "type": "image_url",
122
+ "image_url": {
123
+ "url": user_image_url,
124
+ "detail": "high",
125
+ },
126
+ })
127
+ if user_text.strip():
128
+ new_user_content.append({
129
+ "type": "text",
130
+ "text": user_text.strip(),
131
+ })
132
+
133
+ if not new_user_content:
134
+ return history, "Please provide text or an image."
135
+
136
+ messages.append({"role": "user", "content": new_user_content})
137
+
138
+ completion = client.chat.completions.create(
139
+ model="grok-vision-beta",
140
+ messages=messages,
141
+ stream=False,
142
+ temperature=0.01,
143
+ )
144
+ assistant_response = completion.choices[0].message.content
145
+
146
+ md = markdown.Markdown(extensions=["fenced_code"])
147
+ converted = md.convert(assistant_response)
148
+
149
+ history.append(((user_text, user_image_url), assistant_response))
150
+
151
+ return history, converted
152
+
153
+ def chat(user_message, image, history):
154
+ """
155
+ Handle a new message from the user. The state 'history' is a list of ((user_text, user_image_url), assistant_text) tuples.
156
+ Returns updated history and the entire conversation as displayed in the Chatbot.
157
+ """
158
+ history, assistant_output = create_response(history, user_message, image)
159
+
160
+ display_chat = []
161
+ for ((u_txt, u_img_url), a_txt) in history:
162
+ user_display = u_txt
163
+ if u_img_url and u_img_url.startswith("data:image"):
164
+ user_display += "\n\n[User uploaded an image]"
165
+ elif u_img_url and u_img_url.startswith("http"):
166
+ user_display += f"\n\n[User provided image URL: {u_img_url}]"
167
+
168
+ display_chat.append((user_display.strip(), a_txt.strip()))
169
+
170
+ return display_chat, history
171
+
172
+ with gr.Blocks() as demo:
173
+ gr.Markdown(
174
+ "# Grok Vision Chatbot\n"
175
+ "Welcome! You can ask questions about images or just general text queries. "
176
+ "You can:\n"
177
+ "- Upload an image and ask a question about it.\n"
178
+ "- Provide an image URL in your message (e.g. `http://example.com/image.jpg What is in this image?`).\n"
179
+ "- Or just ask a text question without any image.\n\n"
180
+ "The assistant remembers previous messages and can reference earlier parts of the conversation."
181
+ )
182
+
183
+ chatbot = gr.Chatbot(label="Conversation")
184
+ with gr.Row():
185
+ image_input = gr.Image(type="filepath", label="Upload an image (optional)", interactive=True)
186
+ user_message_input = gr.Textbox(
187
+ label="Your message:",
188
+ placeholder="Type your text or paste an image URL (e.g. http://... ). You can also combine them."
189
+ )
190
+ submit_button = gr.Button("Send")
191
+
192
+ state = gr.State([])
193
+
194
+ submit_button.click(
195
+ chat,
196
+ inputs=[user_message_input, image_input, state],
197
+ outputs=[chatbot, state]
198
+ )
199
+
200
+ if __name__ == "__main__":
201
+ demo.launch()