import gradio as gr from openai import OpenAI import base64 from PIL import Image import io from datetime import datetime # OpenAI client setup client = OpenAI( base_url="https://openrouter.ai/api/v1", api_key='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' ) def analyze_image(image, prompt): if image is None: return "Please upload or capture an image first." # Convert image to base64 buffered = io.BytesIO() image.save(buffered, format="JPEG") img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") try: response = client.chat.completions.create( model="opengvlab/internvl3-14b:free", messages=[ { "role": "system", "content": """You are Dalton, an expert AI assistant specialized in image understanding. Your tasks include: - Extracting and structuring text from images - Answering questions about image content - Providing detailed descriptions - Analyzing receipts, documents, and other visual content Be thorough, accurate, and helpful in your responses.""" }, { "role": "user", "content": [ {"type": "text", "text": prompt}, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{img_str}" } } ] } ], max_tokens=2048 ) result = response.choices[0].message.content return result except Exception as e: return f"An error occurred: {str(e)}" # Custom CSS for better mobile experience css = """ #mobile-camera { width: 100% !important; } #prompt-textbox { min-height: 100px !important; } .result-box { max-height: 500px; overflow-y: auto; padding: 15px; border: 1px solid #e0e0e0; border-radius: 8px; } .footer { margin-top: 20px; font-size: 12px; color: #666; text-align: center; } """ with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo: gr.Markdown(""" # ๐งพ DaltonVision - InternVL3-14B ### Advanced Image Understanding โข Powered by OpenRouter โข Developed by [Koshur AI](https://koshurai.com) """) with gr.Row(): with gr.Column(): # Image input section image_input = gr.Image( sources=["upload", "webcam"], type="pil", label="Upload or Capture Image", elem_id="mobile-camera" ) # Prompt input prompt_input = gr.Textbox( label="๐ Enter your question or instruction", value="Extract all content structurally", lines=3, elem_id="prompt-textbox" ) submit_btn = gr.Button("๐ Analyze Image", variant="primary") gr.Examples( examples=[ ["What is the total amount on this receipt?"], ["List all items and their prices"], ["Who is the vendor and what is the date?"], ["Describe this image in detail"] ], inputs=[prompt_input], label="๐ก Try these example prompts:" ) with gr.Column(): # Result output result_output = gr.Markdown( label="โ Analysis Result", elem_classes="result-box" ) # Footer gr.Markdown("""
""") # Button action submit_btn.click( fn=analyze_image, inputs=[image_input, prompt_input], outputs=result_output ) # Launch the app if __name__ == "__main__": demo.launch()