import os, base64, json, uuid, torch, gradio as gr
from pathlib import Path
from src.llm.chat import FunctionCallingChat
chatbot = FunctionCallingChat()
chatbot.temperature = 0.7
def image_to_base64(image_path: str):
with open(image_path, "rb") as f:
return base64.b64encode(f.read()).decode("utf-8")
def save_uploaded_image(pil_img) -> Path:
"""Save PIL image to ./static and return its path."""
Path("static").mkdir(exist_ok=True)
filename = f"upload_{uuid.uuid4().hex[:8]}.png"
path = Path("static") / filename
pil_img.save(path)
return path
def inference(pil_img, prompt, task, temperature):
if pil_img is None:
return "❗ Please upload an image first."
img_path = save_uploaded_image(pil_img)
chatbot.temperature = temperature
# build user message
if task == "Detection":
user_msg = f"Please detect objects in the image '{img_path}'."
elif task == "Segmentation":
user_msg = f"Please segment objects in the image '{img_path}'."
else:
prompt = prompt.strip() or "Analyse this image."
user_msg = f"{prompt} (image: '{img_path}')"
try:
out = chatbot(user_msg)
txt = (
"### 🔧 Raw tool-call\n"
f"{out['raw_tool_call']}\n\n"
"### 📦 Tool results\n"
f"{json.dumps(out['results'], indent=2)}"
)
return txt
finally:
# 4️⃣ always delete the temp image
try:
img_path.unlink(missing_ok=True)
except Exception:
pass # if deletion fails we just move on
def create_header():
with gr.Row():
with gr.Column(scale=1):
logo_base64 = image_to_base64("static/aivn_logo.png")
gr.HTML(
f""""""
)
with gr.Column(scale=4):
gr.Markdown(
"""
LLM-driven Detection & Segmentation
🔍 Using Llama 3.2-1B + YOLO + SAM