OscarGD6's picture
Update app.py
8881cbe verified
import gradio as gr
import base64
import json
from PIL import ImageDraw
from io import BytesIO
import re
import requests
from transformers import Qwen2VLProcessor
processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", use_fast=True)
url = "http://localhost:8000/v2/models/vllm_model/generate"
# Function to handle the inference and visualization
def ask_triton(image):
try:
# Image Input
buf = BytesIO()
image.save(buf, format="PNG")
img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
# Build conversation
messages = [
{
'role': 'system',
'content': [{'type': 'text', 'text': "You are a Vision Language Model specialized in product images. Detect nutrition tables."}]
},
{
'role': 'user',
'content': [
{
'type': 'image',
'image': img_b64,
},
{
'type': 'text',
'text': "Detect the bounding box of the nutrition table."
}
]
}
]
# Apply chat template and build payload
chat_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
payload = {
"text_input": chat_text,
"image": img_b64,
"parameters": {
"stream": False,
"temperature": 0,
"max_tokens": 2048
}
}
# Send POST request to vLLM
response = requests.post(url, json=payload)
resp_json = response.json()
output_text = resp_json.get("text_output", "")
# Extract assistant response
if "<|im_start|>assistant\n" in output_text:
output_text = output_text.rsplit("<|im_start|>assistant\n", 1)[-1]
# Extract and draw bounding box
match = re.search(r"\((\d+),(\d+)\),\((\d+),(\d+)\)", output_text)
if match:
x1, y1, x2, y2 = map(int, match.groups())
draw = ImageDraw.Draw(image)
w, h = image.size
draw.rectangle((x1 / 1000 * w, y1 / 1000 * h, x2 / 1000 * w, y2 / 1000 * h), outline="green", width=10)
return image, output_text
except Exception as e:
return f"Error: {e}"
# Gradio Interface
gr.Interface(
fn=ask_triton,
inputs=[
gr.Image(type="pil")
],
outputs=["image", "text"],
title="Nutrition Table Detection",
description="Please upload image containing a nutrition table to visualizes bounding box prediction."
).launch(server_name="0.0.0.0", server_port=7860)