grok / app.py
merterbak's picture
Update app.py
f52f4cd verified
import os
import base64
import markdown
import gradio as gr
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv()
XAI_API_KEY = os.getenv("XAI_API_KEY")
client = OpenAI(
api_key=XAI_API_KEY,
base_url="https://api.x.ai/v1",
)
def build_messages_from_history(history):
messages = [
{
"role": "system",
"content": "You are Grok Vision, an assistant designed to understand and describe images and also answer text-based queries. "
"You should use all previous messages in the conversation as context. Provide clear, positive, and useful responses."
}
]
for ((user_text, user_image_url), assistant_text) in history:
user_content = []
if user_image_url:
image_content = {
"type": "image_url",
"image_url": {
"url": user_image_url,
"detail": "high",
},
}
user_content.append(image_content)
if user_text.strip():
user_content.append({
"type": "text",
"text": user_text.strip(),
})
messages.append({
"role": "user",
"content": user_content
})
# Add the assistant turn
messages.append({
"role": "assistant",
"content": assistant_text
})
return messages
def create_response(history, user_text, user_image_path):
user_text = user_text.strip()
user_image_url = ""
if user_text.startswith("http"):
parts = user_text.split(" ", 1)
user_image_url = parts[0]
if len(parts) > 1:
user_text = parts[1]
else:
user_text = ""
if user_image_path is not None:
with open(user_image_path, "rb") as f:
image_bytes = f.read()
base64_image = base64.b64encode(image_bytes).decode("utf-8")
user_image_url = f"data:image/jpeg;base64,{base64_image}"
temp_history = history.copy()
temp_history.append(((user_text, user_image_url), ""))
messages = [
{
"role": "system",
"content": "You are Grok Vision, an assistant designed to understand and describe images and also answer text-based queries. "
"You should use all previous messages in the conversation as context. Provide clear, positive, and useful responses."
}
]
for ((old_user_text, old_user_image_url), old_assistant_text) in history:
old_user_content = []
if old_user_image_url:
old_user_content.append({
"type": "image_url",
"image_url": {
"url": old_user_image_url,
"detail": "high",
},
})
if old_user_text.strip():
old_user_content.append({
"type": "text",
"text": old_user_text.strip(),
})
messages.append({"role": "user", "content": old_user_content})
messages.append({"role": "assistant", "content": old_assistant_text})
new_user_content = []
if user_image_url:
new_user_content.append({
"type": "image_url",
"image_url": {
"url": user_image_url,
"detail": "high",
},
})
if user_text.strip():
new_user_content.append({
"type": "text",
"text": user_text.strip(),
})
if not new_user_content:
return history, "Please provide text or an image."
messages.append({"role": "user", "content": new_user_content})
completion = client.chat.completions.create(
model="grok-2-vision-1212",
messages=messages,
stream=False,
temperature=0.01,
)
assistant_response = completion.choices[0].message.content
md = markdown.Markdown(extensions=["fenced_code"])
converted = md.convert(assistant_response)
history.append(((user_text, user_image_url), assistant_response))
return history, converted
def chat(user_message, image, history):
history, assistant_output = create_response(history, user_message, image)
display_chat = []
for ((u_txt, u_img_url), a_txt) in history:
user_display = u_txt
if u_img_url and u_img_url.startswith("data:image"):
user_display += "\n\n[User uploaded an image]"
elif u_img_url and u_img_url.startswith("http"):
user_display += f"\n\n[User provided image URL: {u_img_url}]"
display_chat.append((user_display.strip(), a_txt.strip()))
return display_chat, history
with gr.Blocks() as demo:
gr.Markdown(
"# Grok 2 Vision Chatbot\n"
"Welcome!"
"You can do following things with Grok:\n"
"- Upload an image and ask a question about it.\n"
"- Provide an image URL in your message (e.g. `http://example.com/image.jpg What is in this image?`).\n"
"- Or just ask a text question without any image.\n\n"
"Also it remembers previous messages too."
)
chatbot = gr.Chatbot(label="Conversation")
with gr.Row():
image_input = gr.Image(type="filepath", label="Upload an image (optional)", interactive=True)
user_message_input = gr.Textbox(
label="Your message:",
placeholder="Type your text or paste an image URL (e.g. http://... ). You can also combine them."
)
submit_button = gr.Button("Send")
state = gr.State([])
submit_button.click(
chat,
inputs=[user_message_input, image_input, state],
outputs=[chatbot, state]
)
if __name__ == "__main__":
demo.launch()