Spaces:

NexaAIDev
/

omnivlm-dpo-demo

Running

App Files Files Community

omnivlm-dpo-demo / app.py

PerryCheng614

change to http requests

70a6a62 16 days ago

raw

history blame

3.96 kB

	import gradio as gr
	import requests
	import json
	import base64
	from PIL import Image
	import io
	import os
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()
	API_KEY = os.getenv("API_KEY")
	if not API_KEY:
	raise ValueError("API_KEY environment variable must be set")

	def process_image_stream(image_path, prompt, max_tokens=512):
	"""
	Process image with streaming response via HTTP
	"""
	if not image_path:
	yield "Please upload an image first."
	return

	try:
	# Read and prepare image file
	with open(image_path, 'rb') as img_file:
	files = {
	'image': ('image.jpg', img_file, 'image/jpeg')
	}
	data = {
	'prompt': prompt,
	'task': 'instruct',
	'max_tokens': max_tokens
	}
	headers = {
	'X-API-Key': API_KEY
	}

	# Make streaming request
	response = requests.post(
	'https://nexa-omni.nexa4ai.com/process-image/',
	files=files,
	data=data,
	headers=headers,
	stream=True
	)

	if response.status_code != 200:
	yield f"Error: Server returned status code {response.status_code}"
	return

	# Initialize response and token counter
	response_text = ""
	token_count = 0

	# Process the streaming response
	for line in response.iter_lines():
	if line:
	line = line.decode('utf-8')
	if line.startswith('data: '):
	try:
	data = json.loads(line[6:]) # Skip 'data: ' prefix
	if data["status"] == "generating":
	# Skip first three tokens if they match specific patterns
	if token_count < 3 and data["token"] in [" ", " \n", "\n", "<\|im_start\|>", "assistant"]:
	token_count += 1
	continue
	response_text += data["token"]
	yield response_text
	elif data["status"] == "complete":
	break
	elif data["status"] == "error":
	yield f"Error: {data['error']}"
	break
	except json.JSONDecodeError:
	continue

	except Exception as e:
	yield f"Error processing request: {str(e)}"

	# Create Gradio interface
	demo = gr.Interface(
	fn=process_image_stream,
	inputs=[
	gr.Image(type="filepath", label="Upload Image"),
	gr.Textbox(
	label="Question",
	placeholder="Ask a question about the image...",
	value="Describe this image"
	),
	gr.Slider(
	minimum=50,
	maximum=200,
	value=200,
	step=1,
	label="Max Tokens"
	)
	],
	outputs=gr.Textbox(label="Response", interactive=False),
	title="NEXA OmniVLM-968M",
	description=f"""
	Model Repo: <a href="https://huggingface.co/NexaAIDev/OmniVLM-968M">NexaAIDev/OmniVLM-968M</a>

	*Model updated on Nov 21, 2024\n
	Upload an image and ask questions about it. The model will analyze the image and provide detailed answers to your queries.
	""",
	examples=[
	["example_images/example_1.jpg", "What kind of cat is this?", 128],
	["example_images/example_2.jpg", "What color is this dress? ", 128],
	["example_images/example_3.jpg", "What is this image about?", 128],
	]
	)

	if __name__ == "__main__":
	demo.queue().launch(server_name="0.0.0.0", server_port=7860)