import gradio as gr import openai import base64 import os from PIL import Image import io import requests from typing import Optional, Tuple # Set up OpenRouter client using OpenAI SDK client = openai.OpenAI( base_url="https://openrouter.ai/api/v1", api_key=os.getenv("API_KEY") # Set your OpenRouter API key ) def encode_image_to_base64(image: Image.Image) -> str: """ Convert PIL Image to base64 string for OpenRouter API """ # Convert image to RGB if it's in a different mode if image.mode != 'RGB': image = image.convert('RGB') # Save image to bytes buffer with higher quality for vision models buffer = io.BytesIO() image.save(buffer, format='JPEG', quality=90) buffer.seek(0) # Encode to base64 image_bytes = buffer.getvalue() base64_string = base64.b64encode(image_bytes).decode('utf-8') return base64_string def analyze_image_with_qwen(image: Image.Image, query: str, model: str = "qwen/qwen2.5-vl-72b-instruct:free") -> str: """ Analyze image using Qwen 2.5 VL via OpenRouter API """ try: # Check if API key is set if not os.getenv("API_KEY"): return "❌ Error: OpenRouter API key not found. Please set the API_KEY environment variable." # Encode image to base64 base64_image = encode_image_to_base64(image) # Prepare the message for OpenRouter API (OpenAI-compatible format) messages = [ { "role": "user", "content": [ { "type": "text", "text": query }, { "type": "image_url", "image_url": { "url": f"image/jpeg;base64,{base64_image}" } } ] } ] # Make API call with OpenRouter headers response = client.chat.completions.create( model=model, messages=messages, max_tokens=2000, temperature=0.3, extra_headers={ "HTTP-Referer": "https://your-app-name.com", # Optional: your site URL "X-Title": "AI Image Analyzer" # Optional: your app name } ) # Extract and return the response analysis = response.choices.message.content return analysis except openai.AuthenticationError: return "❌ Authentication Error: Invalid OpenRouter API key. Please check your API key." except openai.RateLimitError: return "❌ Rate Limit Error: You have exceeded your OpenRouter API rate limit. Please try again later." except openai.APIError as e: return f"❌ OpenRouter API Error: {str(e)}" except Exception as e: return f"❌ Unexpected Error: {str(e)}" def process_image_query(image: Optional[Image.Image], query: str) -> Tuple[str, str]: """ Main function to process image and query using Qwen 2.5 VL """ # Validation if image is None: return "❌ Please upload an image first.", "" if not query.strip(): return "❌ Please enter a question about the image.", "" # Analyze the image try: result = analyze_image_with_qwen(image, query.strip()) # Format the response formatted_result = f"## 🤖 Qwen 2.5 VL Analysis\n\n**Your Question:** {query}\n\n**Analysis:**\n{result}" return formatted_result, "✅ Analysis completed successfully!" except Exception as e: error_msg = f"❌ Error during analysis: {str(e)}" return error_msg, "" def get_example_queries(): """ Return a list of example queries optimized for Qwen 2.5 VL capabilities """ return [ "What objects can you see in this image? Provide detailed descriptions.", "Describe the colors, lighting, composition and overall mood of this image.", "What is happening in this scene? Analyze the activities and context.", "Extract and transcribe any text visible in this image.", "What is the setting or location? Describe the environment in detail.", "Analyze the people in this image - clothing, expressions, poses, and interactions.", "Identify any animals, plants, or natural elements in this image.", "Analyze the artistic elements: composition, style, technique, and visual impact.", "What safety concerns, hazards, or important details do you notice?", "Create a structured analysis comparing different elements in this image." ] def load_example_query(query): """ Load selected example query into the textbox """ return query # Custom CSS with updated branding custom_css = """ .gradio-container { max-width: 1200px !important; margin: auto !important; } .image-upload { min-height: 400px !important; } .query-textbox textarea { min-height: 100px !important; } .example-queries .gr-button { margin: 2px !important; font-size: 12px !important; } .header-text { text-align: center; padding: 20px; background: linear-gradient(135deg, #e74c3c 0%, #8e44ad 100%); color: white; border-radius: 10px; margin-bottom: 20px; } .footer-text { text-align: center; color: #666; font-size: 12px; margin-top: 20px; } .model-info { background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%); border-left: 4px solid #e74c3c; padding: 15px; border-radius: 8px; margin: 10px 0; } """ # Create the Gradio interface def create_gradio_interface(): """ Create and configure the main Gradio interface for Qwen 2.5 VL """ with gr.Blocks( title="🤖 Qwen 2.5 VL Image Analyzer", theme=gr.themes.Soft(), css=custom_css ) as interface: # Header gr.HTML("""

🤖 Qwen 2.5 VL Image Analyzer

Upload any image and ask questions about it! Powered by Qwen 2.5 VL 72B via OpenRouter (FREE)

""") # Model information gr.HTML("""

🔥 Using Qwen 2.5 VL 72B Instruct (Free)

Model: qwen/qwen2.5-vl-72b-instruct:free via OpenRouter

Capabilities: Advanced vision-language understanding, text recognition, chart analysis, detailed object recognition

Context: 32,768 tokens | Cost: FREE tier available

""") with gr.Row(): # Left Column - Input with gr.Column(scale=1): gr.Markdown("### 📤 Upload Your Image") image_input = gr.Image( label="Select or drag & drop an image", type="pil", height=400, elem_classes=["image-upload"] ) gr.Markdown("### ❓ Ask Your Question") query_input = gr.Textbox( label="What would you like to know about this image?", placeholder="Ask detailed questions about objects, text, scenes, analysis, etc.", lines=4, elem_classes=["query-textbox"] ) # Action buttons with gr.Row(): analyze_btn = gr.Button( "🔍 Analyze with Qwen 2.5 VL", variant="primary", scale=2 ) clear_btn = gr.Button( "🗑️ Clear", variant="secondary", scale=1 ) # Right Column - Output with gr.Column(scale=1): gr.Markdown("### 🤖 Qwen 2.5 VL Analysis Results") result_output = gr.Markdown( value="Upload an image and ask a question to see the Qwen 2.5 VL analysis here...", label="Analysis Result", height=400 ) status_output = gr.Textbox( label="Status", interactive=False, max_lines=2 ) # Example Queries Section gr.Markdown("### 💡 Example Questions Optimized for Qwen 2.5 VL") example_queries = get_example_queries() # First row of example buttons with gr.Row(): for i in range(5): btn_text = example_queries[i][:55] + ("..." if len(example_queries[i]) > 55 else "") btn = gr.Button(btn_text, size="sm") btn.click( fn=lambda x=example_queries[i]: x, outputs=query_input ) # Second row of example buttons with gr.Row(): for i in range(5, 10): btn_text = example_queries[i][:55] + ("..." if len(example_queries[i]) > 55 else "") btn = gr.Button(btn_text, size="sm") btn.click( fn=lambda x=example_queries[i]: x, outputs=query_input ) # Instructions gr.Markdown(""" ### 📋 How to Use: 1. **Get OpenRouter API Key**: Sign up at [OpenRouter.ai](https://openrouter.ai) and get your free API key 2. **Upload an Image**: Click on the image area or drag & drop your image file 3. **Ask a Question**: Type your question or click an example question below 4. **Analyze**: Click "Analyze with Qwen 2.5 VL" to get AI insights 5. **Review Results**: The detailed analysis will appear on the right side ### 🎯 Qwen 2.5 VL Excels At: - **Object Recognition**: Flowers, birds, fish, insects, everyday objects - **Text Analysis**: OCR, document understanding, chart reading - **Scene Understanding**: Complex visual reasoning and context analysis - **Structured Output**: JSON, tables, organized information extraction - **Multi-language**: Support for 29+ languages including Chinese, English, etc. ### 🔧 Requirements: - OpenRouter API key: Set as environment variable `OPENROUTER_API_KEY` - Internet connection for API calls - Supported formats: JPG, PNG, GIF, BMP, WEBP ### 🆓 Cost & Limits: - **Free tier available** with generous limits - Model: qwen/qwen2.5-vl-72b-instruct:free - No cost per token on free tier """) # Event Handlers # Main analyze button analyze_btn.click( fn=process_image_query, inputs=[image_input, query_input], outputs=[result_output, status_output], show_progress=True ) # Clear button def clear_all(): return None, "", "Ready for new analysis with Qwen 2.5 VL!", "" clear_btn.click( fn=clear_all, outputs=[image_input, query_input, status_output, result_output] ) # Auto-analyze on Enter key in query box query_input.submit( fn=process_image_query, inputs=[image_input, query_input], outputs=[result_output, status_output] ) # Footer gr.HTML(""" """) return interface # Main execution if __name__ == "__main__": # Check if OpenRouter API key is set if not os.getenv("OPENROUTER_API_KEY"): print("⚠️ WARNING: OPENROUTER_API_KEY environment variable is not set!") print("Please set it using: export OPENROUTER_API_KEY='your-openrouter-api-key-here'") print("Get your free API key at: https://openrouter.ai") print("The app will still launch but image analysis will not work without the API key.\n") print("🚀 Starting Qwen 2.5 VL Image Analyzer...") print("📍 Model: qwen/qwen2.5-vl-72b-instruct:free") print("🌐 Provider: OpenRouter.ai") print("💰 Cost: FREE tier") # Create and launch the interface demo = create_gradio_interface() # Launch with custom settings demo.launch( share=True, # Create a public link server_name="0.0.0.0", # Allow external connections server_port=7860, # Default Gradio port show_error=True, # Show detailed error messages favicon_path=None, # You can add a custom favicon here inbrowser=True # Automatically open in browser )