Spaces:

OscarGD6
/

nutrition-table-detector-triton-vllm-backend

Paused

App Files Files Community

nutrition-table-detector-triton-vllm-backend / app.py

OscarGD6's picture

Update app.py

8881cbe verified 5 months ago

history blame contribute delete

2.74 kB

	import gradio as gr
	import base64
	import json
	from PIL import ImageDraw
	from io import BytesIO
	import re
	import requests
	from transformers import Qwen2VLProcessor

	processor = Qwen2VLProcessor.from_pretrained("Qwen/Qwen2-VL-7B-Instruct", use_fast=True)
	url = "http://localhost:8000/v2/models/vllm_model/generate"

	# Function to handle the inference and visualization
	def ask_triton(image):
	try:

	# Image Input
	buf = BytesIO()
	image.save(buf, format="PNG")
	img_b64 = base64.b64encode(buf.getvalue()).decode("utf-8")

	# Build conversation
	messages = [
	{
	'role': 'system',
	'content': [{'type': 'text', 'text': "You are a Vision Language Model specialized in product images. Detect nutrition tables."}]
	},
	{
	'role': 'user',
	'content': [
	{
	'type': 'image',
	'image': img_b64,
	},
	{
	'type': 'text',
	'text': "Detect the bounding box of the nutrition table."
	}
	]
	}
	]

	# Apply chat template and build payload
	chat_text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

	payload = {
	"text_input": chat_text,
	"image": img_b64,
	"parameters": {
	"stream": False,
	"temperature": 0,
	"max_tokens": 2048
	}
	}

	# Send POST request to vLLM
	response = requests.post(url, json=payload)
	resp_json = response.json()
	output_text = resp_json.get("text_output", "")

	# Extract assistant response
	if "<\|im_start\|>assistant\n" in output_text:
	output_text = output_text.rsplit("<\|im_start\|>assistant\n", 1)[-1]

	# Extract and draw bounding box
	match = re.search(r"\((\d+),(\d+)\),\((\d+),(\d+)\)", output_text)
	if match:
	x1, y1, x2, y2 = map(int, match.groups())
	draw = ImageDraw.Draw(image)
	w, h = image.size
	draw.rectangle((x1 / 1000 * w, y1 / 1000 * h, x2 / 1000 * w, y2 / 1000 * h), outline="green", width=10)

	return image, output_text

	except Exception as e:
	return f"Error: {e}"

	# Gradio Interface
	gr.Interface(
	fn=ask_triton,
	inputs=[
	gr.Image(type="pil")
	],
	outputs=["image", "text"],
	title="Nutrition Table Detection",
	description="Please upload image containing a nutrition table to visualizes bounding box prediction."
	).launch(server_name="0.0.0.0", server_port=7860)