Spaces:
Running
Running
import gradio as gr | |
from openai import OpenAI | |
import base64 | |
from PIL import Image | |
import io | |
from datetime import datetime | |
# OpenAI client setup | |
client = OpenAI( | |
base_url="https://openrouter.ai/api/v1", | |
api_key='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' | |
) | |
def analyze_image(image, prompt): | |
if image is None: | |
return "Please upload or capture an image first." | |
# Convert image to base64 | |
buffered = io.BytesIO() | |
image.save(buffered, format="JPEG") | |
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
try: | |
response = client.chat.completions.create( | |
model="opengvlab/internvl3-14b:free", | |
messages=[ | |
{ | |
"role": "system", | |
"content": """You are Dalton, an expert AI assistant specialized in image understanding. | |
Your tasks include: | |
- Extracting and structuring text from images | |
- Answering questions about image content | |
- Providing detailed descriptions | |
- Analyzing receipts, documents, and other visual content | |
Be thorough, accurate, and helpful in your responses.""" | |
}, | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": prompt}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{img_str}" | |
} | |
} | |
] | |
} | |
], | |
max_tokens=2048 | |
) | |
result = response.choices[0].message.content | |
return result | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
# Custom CSS for better mobile experience | |
css = """ | |
#mobile-camera { width: 100% !important; } | |
#prompt-textbox { min-height: 100px !important; } | |
.result-box { | |
max-height: 500px; | |
overflow-y: auto; | |
padding: 15px; | |
border: 1px solid #e0e0e0; | |
border-radius: 8px; | |
} | |
.footer { | |
margin-top: 20px; | |
font-size: 12px; | |
color: #666; | |
text-align: center; | |
} | |
""" | |
with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo: | |
gr.Markdown(""" | |
# π§Ύ DaltonVision - InternVL3-14B | |
### Advanced Image Understanding β’ Powered by OpenRouter β’ Developed by [Koshur AI](https://koshurai.com) | |
""") | |
with gr.Row(): | |
with gr.Column(): | |
# Image input section | |
image_input = gr.Image( | |
sources=["upload", "webcam"], | |
type="pil", | |
label="Upload or Capture Image", | |
elem_id="mobile-camera" | |
) | |
# Prompt input | |
prompt_input = gr.Textbox( | |
label="π Enter your question or instruction", | |
value="Extract all content structurally", | |
lines=3, | |
elem_id="prompt-textbox" | |
) | |
submit_btn = gr.Button("π Analyze Image", variant="primary") | |
gr.Examples( | |
examples=[ | |
["What is the total amount on this receipt?"], | |
["List all items and their prices"], | |
["Who is the vendor and what is the date?"], | |
["Describe this image in detail"] | |
], | |
inputs=[prompt_input], | |
label="π‘ Try these example prompts:" | |
) | |
with gr.Column(): | |
# Result output | |
result_output = gr.Markdown( | |
label="β Analysis Result", | |
elem_classes="result-box" | |
) | |
# Footer | |
gr.Markdown(""" | |
<div class="footer"> | |
Β© 2025 Koshur AI. All rights reserved.<br> | |
Note: Images are processed in real-time and not stored. | |
</div> | |
""") | |
# Button action | |
submit_btn.click( | |
fn=analyze_image, | |
inputs=[image_input, prompt_input], | |
outputs=result_output | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |