|
import os |
|
import base64 |
|
import requests |
|
from pathlib import Path |
|
from openai import OpenAI |
|
from urllib.parse import urlparse |
|
from dotenv import load_dotenv |
|
|
|
|
|
def describe_image(image_path: str) -> str: |
|
""" |
|
Generate a description of the image at the given path or URL. |
|
|
|
Args: |
|
image_path: Path to local image file OR URL to image |
|
|
|
Returns: |
|
A string description of the image """ |
|
load_dotenv() |
|
|
|
|
|
api_key = os.getenv("NEBIUS_API_KEY") |
|
if not api_key: |
|
return "Error: NEBIUS_API_KEY environment variable not set" |
|
|
|
try: |
|
|
|
parsed = urlparse(image_path) |
|
is_url = bool(parsed.scheme and parsed.netloc) |
|
|
|
if is_url: |
|
|
|
print(f"๐ก Downloading image from URL: {image_path}") |
|
response = requests.get(image_path, timeout=30) |
|
response.raise_for_status() |
|
image_data = response.content |
|
|
|
|
|
content_type = response.headers.get('content-type', '') |
|
if 'image' not in content_type: |
|
return f"Error: URL does not appear to contain an image. Content-Type: {content_type}" |
|
|
|
else: |
|
|
|
image_path = Path(image_path) |
|
|
|
if not image_path.exists(): |
|
return f"Error: Local file not found: {image_path}" |
|
|
|
|
|
valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'} |
|
if image_path.suffix.lower() not in valid_extensions: |
|
return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}" |
|
|
|
print(f"๐ Reading local image: {image_path}") |
|
with open(image_path, "rb") as f: |
|
image_data = f.read() |
|
|
|
|
|
base64_image = base64.b64encode(image_data).decode('utf-8') |
|
|
|
|
|
client = OpenAI( |
|
base_url="https://api.studio.nebius.com/v1/", |
|
api_key=api_key |
|
) |
|
|
|
|
|
response = client.chat.completions.create( |
|
model="mistralai/Mistral-Small-3.1-24B-Instruct-2503", |
|
messages=[ |
|
{ |
|
"role": "system", |
|
"content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details." |
|
}, |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": "Please provide a detailed description of this image." |
|
}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}" |
|
} |
|
} |
|
] |
|
} |
|
], |
|
max_tokens=500 |
|
) |
|
|
|
description = response.choices[0].message.content.strip() |
|
return description |
|
|
|
except requests.RequestException as e: |
|
return f"Error downloading image from URL: {str(e)}" |
|
except FileNotFoundError: |
|
return f"Error: File not found: {image_path}" |
|
except Exception as e: |
|
error_msg = str(e) |
|
|
|
if "vision" in error_msg.lower() or "image" in error_msg.lower(): |
|
return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}" |
|
elif "401" in error_msg or "unauthorized" in error_msg.lower(): |
|
return "Error: Invalid API key or insufficient permissions" |
|
elif "rate" in error_msg.lower() or "quota" in error_msg.lower(): |
|
return f"Error: API rate limit or quota exceeded: {error_msg}" |
|
else: |
|
return f"Error processing image: {error_msg}" |
|
|