Spaces:

AgentsGuards
/

image_utilities_mcp

Running

image_utilities_mcp / src /utils /describe.py

JuanjoSG5

curretn progress

cc083b4 23 days ago

4.29 kB

	import os
	import base64
	import requests
	from pathlib import Path
	from openai import OpenAI
	from urllib.parse import urlparse
	from dotenv import load_dotenv


	def describe_image(image_path: str) -> str:
	"""
	Generate a description of the image at the given path or URL.

	Args:
	image_path: Path to local image file OR URL to image

	Returns:
	A string description of the image """
	load_dotenv()

	# Check if API key is available
	api_key = os.getenv("NEBIUS_API_KEY")
	if not api_key:
	return "Error: NEBIUS_API_KEY environment variable not set"

	try:
	# Determine if it's a URL or local file path
	parsed = urlparse(image_path)
	is_url = bool(parsed.scheme and parsed.netloc)

	if is_url:
	# Handle URL
	print(f"📡 Downloading image from URL: {image_path}")
	response = requests.get(image_path, timeout=30)
	response.raise_for_status()
	image_data = response.content

	# Determine content type from response headers
	content_type = response.headers.get('content-type', '')
	if 'image' not in content_type:
	return f"Error: URL does not appear to contain an image. Content-Type: {content_type}"

	else:
	# Handle local file
	image_path = Path(image_path)

	if not image_path.exists():
	return f"Error: Local file not found: {image_path}"

	# Check if it's an image file
	valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
	if image_path.suffix.lower() not in valid_extensions:
	return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}"

	print(f"📁 Reading local image: {image_path}")
	with open(image_path, "rb") as f:
	image_data = f.read()

	# Encode image to base64
	base64_image = base64.b64encode(image_data).decode('utf-8')

	# Create OpenAI client
	client = OpenAI(
	base_url="https://api.studio.nebius.com/v1/",
	api_key=api_key
	)

	# Make API call with proper vision format
	response = client.chat.completions.create(
	model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
	messages=[
	{
	"role": "system",
	"content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details."
	},
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "Please provide a detailed description of this image."
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{base64_image}"
	}
	}
	]
	}
	],
	max_tokens=500
	)

	description = response.choices[0].message.content.strip()
	return description

	except requests.RequestException as e:
	return f"Error downloading image from URL: {str(e)}"
	except FileNotFoundError:
	return f"Error: File not found: {image_path}"
	except Exception as e:
	error_msg = str(e)

	if "vision" in error_msg.lower() or "image" in error_msg.lower():
	return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}"
	elif "401" in error_msg or "unauthorized" in error_msg.lower():
	return "Error: Invalid API key or insufficient permissions"
	elif "rate" in error_msg.lower() or "quota" in error_msg.lower():
	return f"Error: API rate limit or quota exceeded: {error_msg}"
	else:
	return f"Error processing image: {error_msg}"