import gradio as gr import openai import base64 import os from PIL import Image import io import requests from typing import Optional, Tuple # Set up OpenRouter client using OpenAI SDK client = openai.OpenAI( base_url="https://openrouter.ai/api/v1", api_key=os.getenv("API_KEY") # Set your OpenRouter API key ) def encode_image_to_base64(image: Image.Image) -> str: """ Convert PIL Image to base64 string for OpenRouter API """ # Convert image to RGB if it's in a different mode if image.mode != 'RGB': image = image.convert('RGB') # Save image to bytes buffer with higher quality for vision models buffer = io.BytesIO() image.save(buffer, format='JPEG', quality=90) buffer.seek(0) # Encode to base64 image_bytes = buffer.getvalue() base64_string = base64.b64encode(image_bytes).decode('utf-8') return base64_string def analyze_image_with_qwen(image: Image.Image, query: str, model: str = "qwen/qwen2.5-vl-72b-instruct:free") -> str: """ Analyze image using Qwen 2.5 VL via OpenRouter API """ try: # Check if API key is set if not os.getenv("API_KEY"): return "❌ Error: OpenRouter API key not found. Please set the API_KEY environment variable." # Encode image to base64 base64_image = encode_image_to_base64(image) # Prepare the message for OpenRouter API (OpenAI-compatible format) messages = [ { "role": "user", "content": [ { "type": "text", "text": query }, { "type": "image_url", "image_url": { "url": f"image/jpeg;base64,{base64_image}" } } ] } ] # Make API call with OpenRouter headers response = client.chat.completions.create( model=model, messages=messages, max_tokens=2000, temperature=0.3, extra_headers={ "HTTP-Referer": "https://your-app-name.com", # Optional: your site URL "X-Title": "AI Image Analyzer" # Optional: your app name } ) # Extract and return the response analysis = response.choices.message.content return analysis except openai.AuthenticationError: return "❌ Authentication Error: Invalid OpenRouter API key. Please check your API key." except openai.RateLimitError: return "❌ Rate Limit Error: You have exceeded your OpenRouter API rate limit. Please try again later." except openai.APIError as e: return f"❌ OpenRouter API Error: {str(e)}" except Exception as e: return f"❌ Unexpected Error: {str(e)}" def process_image_query(image: Optional[Image.Image], query: str) -> Tuple[str, str]: """ Main function to process image and query using Qwen 2.5 VL """ # Validation if image is None: return "❌ Please upload an image first.", "" if not query.strip(): return "❌ Please enter a question about the image.", "" # Analyze the image try: result = analyze_image_with_qwen(image, query.strip()) # Format the response formatted_result = f"## 🤖 Qwen 2.5 VL Analysis\n\n**Your Question:** {query}\n\n**Analysis:**\n{result}" return formatted_result, "✅ Analysis completed successfully!" except Exception as e: error_msg = f"❌ Error during analysis: {str(e)}" return error_msg, "" def get_example_queries(): """ Return a list of example queries optimized for Qwen 2.5 VL capabilities """ return [ "What objects can you see in this image? Provide detailed descriptions.", "Describe the colors, lighting, composition and overall mood of this image.", "What is happening in this scene? Analyze the activities and context.", "Extract and transcribe any text visible in this image.", "What is the setting or location? Describe the environment in detail.", "Analyze the people in this image - clothing, expressions, poses, and interactions.", "Identify any animals, plants, or natural elements in this image.", "Analyze the artistic elements: composition, style, technique, and visual impact.", "What safety concerns, hazards, or important details do you notice?", "Create a structured analysis comparing different elements in this image." ] def load_example_query(query): """ Load selected example query into the textbox """ return query # Custom CSS with updated branding custom_css = """ .gradio-container { max-width: 1200px !important; margin: auto !important; } .image-upload { min-height: 400px !important; } .query-textbox textarea { min-height: 100px !important; } .example-queries .gr-button { margin: 2px !important; font-size: 12px !important; } .header-text { text-align: center; padding: 20px; background: linear-gradient(135deg, #e74c3c 0%, #8e44ad 100%); color: white; border-radius: 10px; margin-bottom: 20px; } .footer-text { text-align: center; color: #666; font-size: 12px; margin-top: 20px; } .model-info { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); border-left: 4px solid #e74c3c; padding: 15px; border-radius: 8px; margin: 10px 0; } """ # Create the Gradio interface def create_gradio_interface(): """ Create and configure the main Gradio interface for Qwen 2.5 VL """ with gr.Blocks( title="🤖 Qwen 2.5 VL Image Analyzer", theme=gr.themes.Soft(), css=custom_css ) as interface: # Header gr.HTML("""
Upload any image and ask questions about it! Powered by Qwen 2.5 VL 72B via OpenRouter (FREE)
Model: qwen/qwen2.5-vl-72b-instruct:free via OpenRouter
Capabilities: Advanced vision-language understanding, text recognition, chart analysis, detailed object recognition
Context: 32,768 tokens | Cost: FREE tier available