Spaces:

merterbak
/

grok

Running

App Files Files Community

merterbak commited on 23 days ago

Commit

687c335

•

1 Parent(s): db7eb7e

Create app.py

Browse files

Files changed (1) hide show

app.py +201 -0

app.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import os
+import base64
+import markdown
+import gradio as gr
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+XAI_API_KEY = os.getenv("XAI_API_KEY")
+client = OpenAI(
+    api_key=XAI_API_KEY,
+    base_url="https://api.x.ai/v1",
+)
+def build_messages_from_history(history):
+    """
+    Convert the stored conversation (with user and assistant turns, including images) into a
+    messages array suitable for the model. History is a list of tuples:
+    [
+      ((user_text, user_image_url), assistant_text),
+      ...
+    ]
+    We return a list of messages starting with a system role, followed by alternating user/assistant.
+    """
+    messages = [
+        {
+            "role": "system",
+            "content": "You are Grok Vision, an assistant designed to understand and describe images and also answer text-based queries. "
+                       "You should use all previous messages in the conversation as context. Provide clear, positive, and useful responses."
+        }
+    ]
+    for ((user_text, user_image_url), assistant_text) in history:
+        user_content = []
+        if user_image_url:
+            image_content = {
+                "type": "image_url",
+                "image_url": {
+                    "url": user_image_url,
+                    "detail": "high",
+                },
+            }
+            user_content.append(image_content)
+        if user_text.strip():
+            user_content.append({
+                "type": "text",
+                "text": user_text.strip(),
+            })
+        messages.append({
+            "role": "user",
+            "content": user_content
+        })
+        # Add the assistant turn
+        messages.append({
+            "role": "assistant",
+            "content": assistant_text
+        })
+    return messages
+def create_response(history, user_text, user_image_path):
+    """
+    Given the current history, the user's new message (text), and optional uploaded image path,
+    build a new set of messages including the latest user turn, then call the model and update history.
+    """
+    user_text = user_text.strip()
+    user_image_url = ""
+    if user_text.startswith("http"):
+        parts = user_text.split(" ", 1)
+        user_image_url = parts[0]
+        if len(parts) > 1:
+            user_text = parts[1]
+        else:
+            user_text = ""
+    if user_image_path is not None:
+        with open(user_image_path, "rb") as f:
+            image_bytes = f.read()
+        base64_image = base64.b64encode(image_bytes).decode("utf-8")
+        user_image_url = f"data:image/jpeg;base64,{base64_image}"
+    temp_history = history.copy()
+    temp_history.append(((user_text, user_image_url), ""))  # assistant response is empty for now
+    messages = [
+        {
+            "role": "system",
+            "content": "You are Grok Vision, an assistant designed to understand and describe images and also answer text-based queries. "
+                       "You should use all previous messages in the conversation as context. Provide clear, positive, and useful responses."
+        }
+    ]
+    # Add all previous turns except the one we just appended (since it has no assistant response yet)
+    for ((old_user_text, old_user_image_url), old_assistant_text) in history:
+        old_user_content = []
+        if old_user_image_url:
+            old_user_content.append({
+                "type": "image_url",
+                "image_url": {
+                    "url": old_user_image_url,
+                    "detail": "high",
+                },
+            })
+        if old_user_text.strip():
+            old_user_content.append({
+                "type": "text",
+                "text": old_user_text.strip(),
+            })
+        messages.append({"role": "user", "content": old_user_content})
+        messages.append({"role": "assistant", "content": old_assistant_text})
+    new_user_content = []
+    if user_image_url:
+        new_user_content.append({
+            "type": "image_url",
+            "image_url": {
+                "url": user_image_url,
+                "detail": "high",
+            },
+        })
+    if user_text.strip():
+        new_user_content.append({
+            "type": "text",
+            "text": user_text.strip(),
+        })
+    if not new_user_content:
+        return history, "Please provide text or an image."
+    messages.append({"role": "user", "content": new_user_content})
+    completion = client.chat.completions.create(
+        model="grok-vision-beta",
+        messages=messages,
+        stream=False,
+        temperature=0.01,
+    )
+    assistant_response = completion.choices[0].message.content
+    md = markdown.Markdown(extensions=["fenced_code"])
+    converted = md.convert(assistant_response)
+    history.append(((user_text, user_image_url), assistant_response))
+    return history, converted
+def chat(user_message, image, history):
+    """
+    Handle a new message from the user. The state 'history' is a list of ((user_text, user_image_url), assistant_text) tuples.
+    Returns updated history and the entire conversation as displayed in the Chatbot.
+    """
+    history, assistant_output = create_response(history, user_message, image)
+    display_chat = []
+    for ((u_txt, u_img_url), a_txt) in history:
+        user_display = u_txt
+        if u_img_url and u_img_url.startswith("data:image"):
+            user_display += "\n\n[User uploaded an image]"
+        elif u_img_url and u_img_url.startswith("http"):
+            user_display += f"\n\n[User provided image URL: {u_img_url}]"
+        display_chat.append((user_display.strip(), a_txt.strip()))
+    return display_chat, history
+with gr.Blocks() as demo:
+    gr.Markdown(
+        "# Grok Vision Chatbot\n"
+        "Welcome! You can ask questions about images or just general text queries. "
+        "You can:\n"
+        "- Upload an image and ask a question about it.\n"
+        "- Provide an image URL in your message (e.g. `http://example.com/image.jpg What is in this image?`).\n"
+        "- Or just ask a text question without any image.\n\n"
+        "The assistant remembers previous messages and can reference earlier parts of the conversation."
+    )
+    chatbot = gr.Chatbot(label="Conversation")
+    with gr.Row():
+        image_input = gr.Image(type="filepath", label="Upload an image (optional)", interactive=True)
+        user_message_input = gr.Textbox(
+            label="Your message:",
+            placeholder="Type your text or paste an image URL (e.g. http://... ). You can also combine them."
+        )
+    submit_button = gr.Button("Send")
+    state = gr.State([])
+    submit_button.click(
+        chat,
+        inputs=[user_message_input, image_input, state],
+        outputs=[chatbot, state]
+    )
+if __name__ == "__main__":
+    demo.launch()