RobertoBarrosoLuque
Please fixed
5d51c7f
"""Fireworks Vision Studio - Chat interface for Document Intelligence & Design to Code."""
import base64
import os
import tempfile
import time
import uuid
import fitz # PyMuPDF
import gradio as gr
from gradio_client import utils as client_utils
import modelscope_studio.components.antd as antd
import modelscope_studio.components.antdx as antdx
import modelscope_studio.components.base as ms
import modelscope_studio.components.pro as pro
from modelscope_studio.components.pro.chatbot import (
ChatbotActionConfig,
ChatbotBotConfig,
ChatbotMarkdownConfig,
ChatbotUserConfig,
ChatbotWelcomeConfig,
)
from modelscope_studio.components.pro.multimodal_input import (
MultimodalInputUploadConfig,
)
from openai import OpenAI
from PIL import Image
try:
from dotenv import load_dotenv
load_dotenv()
except ImportError:
pass
# ---------------------------------------------------------------------------
# Config
# ---------------------------------------------------------------------------
MODELS = {
"kimi-k2p5": {
"label": "Kimi K2.5",
"id": "accounts/fireworks/models/kimi-k2p5",
"icon": "./assets/kimi-logo.png",
},
"qwen3-vl-30b": {
"label": "Qwen3 VL 30B",
"id": "accounts/fireworks/models/qwen3-vl-30b-a3b-instruct",
"icon": "./assets/qwen-logo.png",
},
}
DEFAULT_MODEL_KEY = "kimi-k2p5"
MAX_IMAGE_DIM = 2048
MAX_PDF_PAGES = 5
PDF_DPI = 200
MAX_MESSAGES_PER_CHAT = 10
DEFAULT_SYS_PROMPT = (
"You are Fireworks Vision Studio, a highly capable multimodal AI assistant. "
"You excel at document analysis (legal, medical, financial) and converting "
"designs/wireframes into code. Be thorough, accurate, and well-structured. "
"Use markdown formatting for clarity. When generating HTML/CSS, return a "
"single self-contained file wrapped in a ```html code fence. "
"When responding, focus primarily on addressing the user's most recent message. "
"Use the conversation history as context to understand the thread, but "
"prioritize answering the latest question or request."
)
DEFAULT_THEME = {
"token": {
"colorPrimary": "#6d28d9",
}
}
def get_client() -> OpenAI:
api_key = os.environ.get("FIREWORKS_API_KEY", "")
if not api_key:
raise ValueError(
"FIREWORKS_API_KEY environment variable is not set. "
"Please set your API key before running the app."
)
return OpenAI(
base_url="https://api.fireworks.ai/inference/v1",
api_key=api_key,
)
# ---------------------------------------------------------------------------
# Image / PDF utilities
# ---------------------------------------------------------------------------
def encode_file_to_base64(file_path: str) -> str:
"""Encode a file to a base64 data URI, resizing images if needed."""
mime_type = client_utils.get_mimetype(file_path)
# Resize large images
if mime_type and mime_type.startswith("image"):
img = Image.open(file_path)
if max(img.size) > MAX_IMAGE_DIM:
img.thumbnail((MAX_IMAGE_DIM, MAX_IMAGE_DIM), Image.LANCZOS)
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
img.save(tmp.name, "PNG")
file_path = tmp.name
mime_type = "image/png"
with open(file_path, "rb") as f:
b64 = base64.b64encode(f.read()).decode("utf-8")
return f"data:{mime_type};base64,{b64}"
def pdf_to_images(pdf_path: str) -> list[str]:
"""Convert PDF pages to temporary PNG files via PyMuPDF."""
doc = fitz.open(pdf_path)
paths = []
for i, page in enumerate(doc):
if i >= MAX_PDF_PAGES:
break
mat = fitz.Matrix(PDF_DPI / 72, PDF_DPI / 72)
pix = page.get_pixmap(matrix=mat)
tmp = tempfile.NamedTemporaryFile(suffix=".png", delete=False)
pix.save(tmp.name)
paths.append(tmp.name)
doc.close()
return paths
# ---------------------------------------------------------------------------
# Message formatting
# ---------------------------------------------------------------------------
def format_history(history):
"""Convert chatbot history to OpenAI-compatible messages."""
messages = [{"role": "system", "content": DEFAULT_SYS_PROMPT}]
for item in history:
if item["role"] == "user":
files = []
for file_path in item["content"][0]["content"]:
if os.path.exists(file_path):
mime_type = client_utils.get_mimetype(file_path)
# Handle PDFs: convert to images
if mime_type == "application/pdf":
for img_path in pdf_to_images(file_path):
data_uri = encode_file_to_base64(img_path)
files.append(
{
"type": "image_url",
"image_url": {"url": data_uri},
}
)
elif mime_type and mime_type.startswith("image"):
data_uri = encode_file_to_base64(file_path)
files.append(
{
"type": "image_url",
"image_url": {"url": data_uri},
}
)
elif file_path.startswith("http"):
files.append(
{"type": "image_url", "image_url": {"url": file_path}}
)
text = item["content"][1]["content"]
messages.append(
{"role": "user", "content": files + [{"type": "text", "text": text}]}
)
elif item["role"] == "assistant":
contents = [
c for c in item["content"] if c.get("type") == "text"
]
messages.append(
{
"role": "assistant",
"content": contents[0]["content"] if contents else "",
}
)
return messages
# ---------------------------------------------------------------------------
# UI config helpers
# ---------------------------------------------------------------------------
def user_config(disabled_actions=None):
return ChatbotUserConfig(
class_names=dict(content="user-message-content"),
actions=[
"copy",
"edit",
ChatbotActionConfig(
action="delete",
popconfirm=dict(
title="Delete message",
description="Are you sure you want to delete this message?",
okButtonProps=dict(danger=True),
),
),
],
disabled_actions=disabled_actions,
)
def bot_config(disabled_actions=None, model_key=DEFAULT_MODEL_KEY):
model = MODELS[model_key]
return ChatbotBotConfig(
actions=[
"copy",
"edit",
ChatbotActionConfig(
action="retry",
popconfirm=dict(
title="Regenerate message",
description="This will also delete all subsequent messages.",
okButtonProps=dict(danger=True),
),
),
ChatbotActionConfig(
action="delete",
popconfirm=dict(
title="Delete message",
description="Are you sure you want to delete this message?",
okButtonProps=dict(danger=True),
),
),
],
avatar=model["icon"],
disabled_actions=disabled_actions,
)
def welcome_config(model_key=DEFAULT_MODEL_KEY):
model = MODELS[model_key]
return ChatbotWelcomeConfig(
variant="borderless",
icon=model["icon"],
title=f"Hello, I'm {model['label']}",
description="Upload images or PDFs and start a conversation. I specialize in document analysis and converting designs to code.",
prompts=dict(
title="How can I help you today?",
styles={
"list": {"width": "100%"},
"item": {"flex": 1},
},
items=[
{
"label": "📄 Document Intelligence",
"children": [
{
"description": "Extract all parties, dates, and key obligations from this contract.",
},
{
"description": "Summarize this medical report. List diagnoses, medications, and follow-up actions.",
},
{
"description": "Extract all line items and totals from this invoice into a markdown table.",
},
],
},
{
"label": "🎨 Design to Code",
"children": [
{
"description": "Convert this wireframe into a responsive HTML page with a modern look.",
},
{
"description": "Build this as a landing page with hero section, features grid, and footer using HTML and CSS.",
},
{
"description": "Create a responsive dashboard layout matching this sketch with HTML, CSS, and inline JS.",
},
],
},
],
),
)
def markdown_config():
return ChatbotMarkdownConfig()
def upload_config():
return MultimodalInputUploadConfig(
accept="image/*,.pdf",
placeholder={
"inline": {
"title": "Upload files",
"description": "Click or drag files to upload images or PDFs",
},
"drop": {
"title": "Drop files here",
},
},
)
# ---------------------------------------------------------------------------
# Event handlers
# ---------------------------------------------------------------------------
class Events:
@staticmethod
def submit(state_value):
history = state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"]
model_key = state_value.get("model_key", DEFAULT_MODEL_KEY)
model_info = MODELS[model_key]
messages = format_history(history)
history.append(
{
"role": "assistant",
"content": [],
"key": str(uuid.uuid4()),
"loading": True,
"header": model_info["label"],
"status": "pending",
}
)
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value),
}
try:
try:
client = get_client()
except ValueError as e:
history[-1]["loading"] = False
history[-1]["status"] = "done"
history[-1]["content"] = [
{
"type": "text",
"content": f'<span style="color: var(--color-red-500)">⚠️ {e}</span>',
}
]
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value),
}
return
response = client.chat.completions.create(
model=model_info["id"],
messages=messages,
stream=True,
max_tokens=4096,
)
start_time = time.time()
answer_content = ""
contents = [None]
for chunk in response:
if not chunk or not chunk.choices[0].delta.content:
continue
delta = chunk.choices[0].delta
if delta.content:
if contents[0] is None:
contents[0] = {"type": "text", "content": ""}
answer_content += delta.content
contents[0]["content"] = answer_content
history[-1]["content"] = [c for c in contents if c]
history[-1]["loading"] = False
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value),
}
history[-1]["status"] = "done"
cost_time = f"{time.time() - start_time:.2f}"
history[-1]["footer"] = f"{cost_time}s"
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value),
}
except Exception as e:
history[-1]["loading"] = False
history[-1]["status"] = "done"
history[-1]["content"] = [
{
"type": "text",
"content": f'<span style="color: var(--color-red-500)">❌ Error: {e}</span>',
}
]
yield {
chatbot: gr.update(value=history),
state: gr.update(value=state_value),
}
raise
@staticmethod
def add_message(input_value, state_value):
text = input_value["text"]
files = input_value["files"]
# Check global session message limit
if state_value.get("total_messages", 0) >= MAX_MESSAGES_PER_CHAT:
# Show error - global limit reached
error_msg = f"⚠️ Session message limit reached ({MAX_MESSAGES_PER_CHAT} messages)."
yield {
chatbot: gr.update(
value=[
{
"role": "assistant",
"content": [{"type": "text", "content": f'<span style="color: var(--color-orange-500)">{error_msg}</span>'}],
"key": str(uuid.uuid4()),
}
]
),
state: gr.update(value=state_value),
}
return
# Create new chat if needed (no conversation active)
if not state_value["conversation_id"]:
random_id = str(uuid.uuid4())
state_value["conversation_id"] = random_id
state_value["conversation_contexts"][random_id] = {"history": []}
state_value["conversations"].append(
{"label": text[:50], "key": random_id}
)
history = state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"]
# Increment global message counter (user message + assistant response = 2 messages)
state_value["total_messages"] = state_value.get("total_messages", 0) + 2
history.append(
{
"key": str(uuid.uuid4()),
"role": "user",
"content": [
{"type": "file", "content": [f for f in files]},
{"type": "text", "content": text},
],
}
)
yield Events.preprocess_submit(clear_input=True)(state_value)
try:
for chunk in Events.submit(state_value):
yield chunk
except Exception:
raise
finally:
yield Events.postprocess_submit(state_value)
@staticmethod
def preprocess_submit(clear_input=True):
def handler(state_value):
history = state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"]
return {
**(
{input: gr.update(value=None, loading=True)}
if clear_input
else {}
),
conversations: gr.update(
active_key=state_value["conversation_id"],
items=list(
map(
lambda item: {
**item,
"disabled": item["key"]
!= state_value["conversation_id"],
},
state_value["conversations"],
)
),
),
add_conversation_btn: gr.update(disabled=True),
clear_btn: gr.update(disabled=True),
conversation_delete_menu_item: gr.update(disabled=True),
chatbot: gr.update(
value=history,
bot_config=bot_config(
disabled_actions=["edit", "retry", "delete"],
model_key=state_value.get("model_key", DEFAULT_MODEL_KEY),
),
user_config=user_config(
disabled_actions=["edit", "delete"]
),
),
state: gr.update(value=state_value),
}
return handler
@staticmethod
def postprocess_submit(state_value):
history = state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"]
mk = state_value.get("model_key", DEFAULT_MODEL_KEY)
return {
input: gr.update(loading=False),
conversation_delete_menu_item: gr.update(disabled=False),
clear_btn: gr.update(disabled=False),
conversations: gr.update(items=state_value["conversations"]),
add_conversation_btn: gr.update(disabled=False),
chatbot: gr.update(
value=history,
bot_config=bot_config(model_key=mk),
user_config=user_config(),
),
state: gr.update(value=state_value),
}
@staticmethod
def cancel(state_value):
history = state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"]
history[-1]["loading"] = False
history[-1]["status"] = "done"
history[-1]["footer"] = "Chat completion paused"
return Events.postprocess_submit(state_value)
@staticmethod
def delete_message(state_value, e: gr.EventData):
index = e._data["payload"][0]["index"]
history = state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"]
# Decrement total_messages counter when deleting
if index < len(history):
deleted_role = history[index]["role"]
if deleted_role == "user":
# User message + expected assistant response = 2 messages
state_value["total_messages"] = max(0, state_value.get("total_messages", 0) - 2)
elif deleted_role == "assistant":
# Just the assistant message = 1 message
state_value["total_messages"] = max(0, state_value.get("total_messages", 0) - 1)
history = history[:index] + history[index + 1 :]
state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"] = history
return gr.update(value=state_value)
@staticmethod
def edit_message(state_value, chatbot_value, e: gr.EventData):
index = e._data["payload"][0]["index"]
history = state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"]
history[index]["content"] = chatbot_value[index]["content"]
if not history[index].get("edited"):
history[index]["edited"] = True
footer = history[index].get("footer", "") or ""
history[index]["footer"] = (footer + " " if footer else "") + "Edited"
return gr.update(value=state_value), gr.update(value=history)
@staticmethod
def regenerate_message(state_value, e: gr.EventData):
index = e._data["payload"][0]["index"]
history = state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"]
history = history[:index]
state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"] = history
yield Events.preprocess_submit()(state_value)
try:
for chunk in Events.submit(state_value):
yield chunk
except Exception:
raise
finally:
yield Events.postprocess_submit(state_value)
@staticmethod
def apply_prompt(e: gr.EventData, input_value):
input_value["text"] = e._data["payload"][0]["value"]["description"]
urls = e._data["payload"][0]["value"].get("urls", [])
input_value["files"] = urls
return gr.update(value=input_value)
@staticmethod
def new_chat(state_value):
if not state_value["conversation_id"]:
return gr.skip()
state_value["conversation_id"] = ""
return (
gr.update(active_key=state_value["conversation_id"]),
gr.update(value=None),
gr.update(value=state_value),
)
@staticmethod
def select_conversation(state_value, e: gr.EventData):
active_key = e._data["payload"][0]
if state_value["conversation_id"] == active_key or (
active_key not in state_value["conversation_contexts"]
):
return gr.skip()
state_value["conversation_id"] = active_key
return (
gr.update(active_key=active_key),
gr.update(
value=state_value["conversation_contexts"][active_key]["history"]
),
gr.update(value=state_value),
)
@staticmethod
def click_conversation_menu(state_value, e: gr.EventData):
conversation_id = e._data["payload"][0]["key"]
operation = e._data["payload"][1]["key"]
if operation == "delete":
del state_value["conversation_contexts"][conversation_id]
state_value["conversations"] = [
item
for item in state_value["conversations"]
if item["key"] != conversation_id
]
if state_value["conversation_id"] == conversation_id:
state_value["conversation_id"] = ""
return (
gr.update(
items=state_value["conversations"],
active_key=state_value["conversation_id"],
),
gr.update(value=None),
gr.update(value=state_value),
)
return (
gr.update(items=state_value["conversations"]),
gr.skip(),
gr.update(value=state_value),
)
@staticmethod
def clear_conversation_history(state_value):
if not state_value["conversation_id"]:
return gr.skip()
state_value["conversation_contexts"][
state_value["conversation_id"]
]["history"] = []
return gr.update(value=None), gr.update(value=state_value)
@staticmethod
def change_model(state_value, e: gr.EventData):
model_key = e._data["payload"][0]
state_value["model_key"] = model_key
return (
gr.update(
welcome_config=welcome_config(model_key),
bot_config=bot_config(model_key=model_key),
),
gr.update(value=state_value),
)
# ---------------------------------------------------------------------------
# CSS
# ---------------------------------------------------------------------------
css = """
.gradio-container {
padding: 0 !important;
}
.gradio-container > main.fillable {
padding: 0 !important;
}
#chatbot {
height: calc(100vh - 70px);
max-height: 1500px;
}
#chatbot .chatbot-conversations {
height: 100vh;
background-color: var(--ms-gr-ant-color-bg-layout);
padding-left: 4px;
padding-right: 4px;
}
#chatbot .chatbot-conversations .chatbot-conversations-list {
padding-left: 0;
padding-right: 0;
}
#chatbot .chatbot-chat {
padding: 32px;
padding-bottom: 16px;
height: 100%;
}
@media (max-width: 768px) {
#chatbot .chatbot-chat {
padding: 10px;
padding-bottom: 10px;
}
}
#chatbot .chatbot-chat .chatbot-chat-messages {
flex: 1;
}
.powered-by-bar {
display: flex;
align-items: center;
justify-content: space-between;
padding: 6px 16px;
border-bottom: 1px solid var(--ms-gr-ant-color-border-secondary, #f0f0f0);
}
.powered-by-bar .bar-title {
font-size: 24px;
font-weight: 600;
color: #333;
}
.powered-by-bar .bar-right {
display: flex;
align-items: center;
gap: 6px;
}
.powered-by-bar .bar-right span {
font-size: 11px;
color: #999;
text-transform: uppercase;
letter-spacing: 0.04em;
}
.powered-by-bar img {
height: 18px;
opacity: 0.85;
}
"""
# ---------------------------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------------------------
with gr.Blocks(css=css, fill_width=True) as demo:
state = gr.State(
{
"conversation_contexts": {},
"conversations": [],
"conversation_id": "",
"model_key": DEFAULT_MODEL_KEY,
"total_messages": 0,
}
)
with ms.Application(), antdx.XProvider(theme=DEFAULT_THEME), ms.AutoLoading():
# Check for API key and show warning if missing
api_key = os.environ.get("FIREWORKS_API_KEY", "")
if not api_key:
antd.Alert(
message="API Key Required",
description="Please set the FIREWORKS_API_KEY environment variable to use this app. The chat will not work without it.",
type="error",
show_icon=True,
banner=True,
closable=False,
)
# Top bar: title left, powered-by right
with ms.Div(elem_classes="powered-by-bar"):
ms.Span("Welcome to FW Vision Studio", elem_classes="bar-title")
with ms.Div(elem_classes="bar-right"):
ms.Span("Powered by")
antd.Image(
"./assets/fireworks_logo.png",
preview=False,
alt="Fireworks AI",
height=30,
)
with antd.Row(gutter=[20, 20], wrap=False, elem_id="chatbot"):
# --- Left sidebar ---
with antd.Col(
md=dict(flex="0 0 260px", span=24, order=0),
span=0,
order=1,
elem_style=dict(width=0),
):
with ms.Div(elem_classes="chatbot-conversations"):
with antd.Flex(
vertical=True, gap="small", elem_style=dict(height="100%")
):
# Model selector
model_select = antd.Select(
value=DEFAULT_MODEL_KEY,
options=[
{"label": m["label"], "value": k}
for k, m in MODELS.items()
],
elem_style=dict(width="100%"),
)
# New conversation button
with antd.Button(
value=None, color="primary", variant="filled", block=True
) as add_conversation_btn:
ms.Text("New Conversation")
with ms.Slot("icon"):
antd.Icon("PlusOutlined")
# Conversations list
with antdx.Conversations(
elem_classes="chatbot-conversations-list",
) as conversations:
with ms.Slot("menu.items"):
with antd.Menu.Item(
label="Delete", key="delete", danger=True
) as conversation_delete_menu_item:
with ms.Slot("icon"):
antd.Icon("DeleteOutlined")
# --- Right: chat area ---
with antd.Col(flex=1):
with antd.Flex(
vertical=True, gap="small", elem_classes="chatbot-chat"
):
chatbot = pro.Chatbot(
elem_classes="chatbot-chat-messages",
height=0,
markdown_config=markdown_config(),
welcome_config=welcome_config(),
user_config=user_config(),
bot_config=bot_config(),
)
with pro.MultimodalInput(
placeholder="Ask me to analyze a document or convert a design to code...",
upload_config=upload_config(),
) as input:
with ms.Slot("prefix"):
with antd.Flex(
gap=4,
wrap=True,
elem_style=dict(
maxWidth="40vw", display="inline-flex"
),
):
with antd.Button(value=None, type="text") as clear_btn:
with ms.Slot("icon"):
antd.Icon("ClearOutlined")
# --- Event wiring ---
# Model selector
model_select.change(
fn=Events.change_model,
inputs=[state],
outputs=[chatbot, state],
)
# Conversations
add_conversation_btn.click(
fn=Events.new_chat,
inputs=[state],
outputs=[conversations, chatbot, state],
)
conversations.active_change(
fn=Events.select_conversation,
inputs=[state],
outputs=[conversations, chatbot, state],
)
conversations.menu_click(
fn=Events.click_conversation_menu,
inputs=[state],
outputs=[conversations, chatbot, state],
)
# Chatbot
chatbot.welcome_prompt_select(
fn=Events.apply_prompt, inputs=[input], outputs=[input]
)
chatbot.delete(fn=Events.delete_message, inputs=[state], outputs=[state])
chatbot.edit(
fn=Events.edit_message,
inputs=[state, chatbot],
outputs=[state, chatbot],
)
regenerating_event = chatbot.retry(
fn=Events.regenerate_message,
inputs=[state],
outputs=[
input,
clear_btn,
conversation_delete_menu_item,
add_conversation_btn,
conversations,
chatbot,
state,
],
)
# Input
submit_event = input.submit(
fn=Events.add_message,
inputs=[input, state],
outputs=[
input,
clear_btn,
conversation_delete_menu_item,
add_conversation_btn,
conversations,
chatbot,
state,
],
)
input.cancel(
fn=Events.cancel,
inputs=[state],
outputs=[
input,
conversation_delete_menu_item,
clear_btn,
conversations,
add_conversation_btn,
chatbot,
state,
],
cancels=[submit_event, regenerating_event],
queue=False,
)
clear_btn.click(
fn=Events.clear_conversation_history,
inputs=[state],
outputs=[chatbot, state],
)
if __name__ == "__main__":
demo.queue(default_concurrency_limit=100).launch(
server_name="0.0.0.0",
server_port=7860,
ssr_mode=False,
max_threads=100,
)