JuanjoSG5
curretn progress
cc083b4
import os
import base64
import requests
from pathlib import Path
from openai import OpenAI
from urllib.parse import urlparse
from dotenv import load_dotenv
def describe_image(image_path: str) -> str:
"""
Generate a description of the image at the given path or URL.
Args:
image_path: Path to local image file OR URL to image
Returns:
A string description of the image """
load_dotenv()
# Check if API key is available
api_key = os.getenv("NEBIUS_API_KEY")
if not api_key:
return "Error: NEBIUS_API_KEY environment variable not set"
try:
# Determine if it's a URL or local file path
parsed = urlparse(image_path)
is_url = bool(parsed.scheme and parsed.netloc)
if is_url:
# Handle URL
print(f"๐Ÿ“ก Downloading image from URL: {image_path}")
response = requests.get(image_path, timeout=30)
response.raise_for_status()
image_data = response.content
# Determine content type from response headers
content_type = response.headers.get('content-type', '')
if 'image' not in content_type:
return f"Error: URL does not appear to contain an image. Content-Type: {content_type}"
else:
# Handle local file
image_path = Path(image_path)
if not image_path.exists():
return f"Error: Local file not found: {image_path}"
# Check if it's an image file
valid_extensions = {'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.webp'}
if image_path.suffix.lower() not in valid_extensions:
return f"Error: Unsupported file type '{image_path.suffix}'. Supported: {valid_extensions}"
print(f"๐Ÿ“ Reading local image: {image_path}")
with open(image_path, "rb") as f:
image_data = f.read()
# Encode image to base64
base64_image = base64.b64encode(image_data).decode('utf-8')
# Create OpenAI client
client = OpenAI(
base_url="https://api.studio.nebius.com/v1/",
api_key=api_key
)
# Make API call with proper vision format
response = client.chat.completions.create(
model="mistralai/Mistral-Small-3.1-24B-Instruct-2503",
messages=[
{
"role": "system",
"content": "You are a helpful assistant that provides detailed descriptions of images. Focus on the main subjects, colors, composition, and any notable details."
},
{
"role": "user",
"content": [
{
"type": "text",
"text": "Please provide a detailed description of this image."
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
}
],
max_tokens=500
)
description = response.choices[0].message.content.strip()
return description
except requests.RequestException as e:
return f"Error downloading image from URL: {str(e)}"
except FileNotFoundError:
return f"Error: File not found: {image_path}"
except Exception as e:
error_msg = str(e)
if "vision" in error_msg.lower() or "image" in error_msg.lower():
return f"Error: This model may not support vision capabilities. Try a vision-enabled model. Details: {error_msg}"
elif "401" in error_msg or "unauthorized" in error_msg.lower():
return "Error: Invalid API key or insufficient permissions"
elif "rate" in error_msg.lower() or "quota" in error_msg.lower():
return f"Error: API rate limit or quota exceeded: {error_msg}"
else:
return f"Error processing image: {error_msg}"