Spaces:
Running
Running
File size: 4,291 Bytes
5a2da34 4b25820 5a2da34 4b25820 5a2da34 42ac28a 5a2da34 4b25820 5a2da34 4b25820 5a2da34 4b25820 5a2da34 4b25820 5a2da34 4b25820 5a2da34 4b25820 3d1d7d0 4b25820 5a2da34 4b25820 5a2da34 4b25820 5a2da34 4b25820 5a2da34 4b25820 5a2da34 4b25820 3d1d7d0 4b25820 5a2da34 4b25820 5a2da34 4b25820 5a2da34 4b25820 5a2da34 3d1d7d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
import gradio as gr
from openai import OpenAI
import base64
from PIL import Image
import io
from datetime import datetime
# OpenAI client setup
client = OpenAI(
base_url="https://openrouter.ai/api/v1",
api_key='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'
)
def analyze_image(image, prompt):
if image is None:
return "Please upload or capture an image first."
# Convert image to base64
buffered = io.BytesIO()
image.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")
try:
response = client.chat.completions.create(
model="opengvlab/internvl3-14b:free",
messages=[
{
"role": "system",
"content": """You are Dalton, an expert AI assistant specialized in image understanding.
Your tasks include:
- Extracting and structuring text from images
- Answering questions about image content
- Providing detailed descriptions
- Analyzing receipts, documents, and other visual content
Be thorough, accurate, and helpful in your responses."""
},
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{img_str}"
}
}
]
}
],
max_tokens=2048
)
result = response.choices[0].message.content
return result
except Exception as e:
return f"An error occurred: {str(e)}"
# Custom CSS for better mobile experience
css = """
#mobile-camera { width: 100% !important; }
#prompt-textbox { min-height: 100px !important; }
.result-box {
max-height: 500px;
overflow-y: auto;
padding: 15px;
border: 1px solid #e0e0e0;
border-radius: 8px;
}
.footer {
margin-top: 20px;
font-size: 12px;
color: #666;
text-align: center;
}
"""
with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo:
gr.Markdown("""
# π§Ύ DaltonVision - InternVL3-14B
### Advanced Image Understanding β’ Powered by OpenRouter β’ Developed by [Koshur AI](https://koshurai.com)
""")
with gr.Row():
with gr.Column():
# Image input section
image_input = gr.Image(
sources=["upload", "webcam"],
type="pil",
label="Upload or Capture Image",
elem_id="mobile-camera"
)
# Prompt input
prompt_input = gr.Textbox(
label="π Enter your question or instruction",
value="Extract all content structurally",
lines=3,
elem_id="prompt-textbox"
)
submit_btn = gr.Button("π Analyze Image", variant="primary")
gr.Examples(
examples=[
["What is the total amount on this receipt?"],
["List all items and their prices"],
["Who is the vendor and what is the date?"],
["Describe this image in detail"]
],
inputs=[prompt_input],
label="π‘ Try these example prompts:"
)
with gr.Column():
# Result output
result_output = gr.Markdown(
label="β
Analysis Result",
elem_classes="result-box"
)
# Footer
gr.Markdown("""
<div class="footer">
Β© 2025 Koshur AI. All rights reserved.<br>
Note: Images are processed in real-time and not stored.
</div>
""")
# Button action
submit_btn.click(
fn=analyze_image,
inputs=[image_input, prompt_input],
outputs=result_output
)
# Launch the app
if __name__ == "__main__":
demo.launch() |