import os, base64, json, uuid, torch, gradio as gr from pathlib import Path from src.llm.chat import FunctionCallingChat chatbot = FunctionCallingChat() chatbot.temperature = 0.7 def image_to_base64(image_path: str): with open(image_path, "rb") as f: return base64.b64encode(f.read()).decode("utf-8") def save_uploaded_image(pil_img) -> Path: """Save PIL image to ./static and return its path.""" Path("static").mkdir(exist_ok=True) filename = f"upload_{uuid.uuid4().hex[:8]}.png" path = Path("static") / filename pil_img.save(path) return path def inference(pil_img, prompt, task, temperature): if pil_img is None: return "❗ Please upload an image first." img_path = save_uploaded_image(pil_img) chatbot.temperature = temperature # build user message if task == "Detection": user_msg = f"Please detect objects in the image '{img_path}'." elif task == "Segmentation": user_msg = f"Please segment objects in the image '{img_path}'." else: prompt = prompt.strip() or "Analyse this image." user_msg = f"{prompt} (image: '{img_path}')" try: out = chatbot(user_msg) txt = ( "### 🔧 Raw tool-call\n" f"{out['raw_tool_call']}\n\n" "### 📦 Tool results\n" f"{json.dumps(out['results'], indent=2)}" ) return txt finally: # 4️⃣ always delete the temp image try: img_path.unlink(missing_ok=True) except Exception: pass # if deletion fails we just move on def create_header(): with gr.Row(): with gr.Column(scale=1): logo_base64 = image_to_base64("static/aivn_logo.png") gr.HTML( f"""Logo""" ) with gr.Column(scale=4): gr.Markdown( """

🖼️ Vision Tool-Calling Demo

LLM-driven Detection & Segmentation

🚀 AIO2024 Module 10 Project 🤗

🔍 Using Llama 3.2-1B + YOLO + SAM

""" ) def create_footer(): footer_html = """ """ return gr.HTML(footer_html) custom_css = """ .gradio-container {min-height:100vh;} .content-wrap {padding-bottom:60px;} .full-width-btn {width:100%!important;height:50px!important;font-size:18px!important; margin-top:20px!important;background:linear-gradient(45deg,#FF6B6B,#4ECDC4)!important; color:white!important;border:none!important;} .full-width-btn:hover {background:linear-gradient(45deg,#FF5252,#3CB4AC)!important;} """ # ──────────────────────────── Blocks ───────────────────────── with gr.Blocks(css=custom_css) as demo: create_header() with gr.Row(equal_height=True, variant="panel"): with gr.Column(scale=3): upload_image = gr.Image(label="Upload image", type="pil") prompt_input = gr.Textbox(label="Optional prompt", placeholder="e.g. Detect cats only") task_choice = gr.Radio(["Auto", "Detection", "Segmentation"], value="Auto", label="Task") # NEW temperature slider temp_slider = gr.Slider(minimum=0.1, maximum=1.5, step=0.1, value=0.7, label="Temperature (sampling)") submit_btn = gr.Button("Run 🔧", elem_classes="full-width-btn") with gr.Column(scale=4): output_text = gr.Markdown(label="Result") submit_btn.click( inference, inputs=[upload_image, prompt_input, task_choice, temp_slider], outputs=output_text, ) create_footer() if __name__ == "__main__": demo.launch(allowed_paths=["static/aivn_logo.png", "static"])