Spaces:

adil9858
/

dalton_vision

Sleeping

App Files Files Community

dalton_vision / app.py

adil9858

Update app.py

4b25820 verified 2 months ago

raw

history blame contribute delete

4.29 kB

	import gradio as gr
	from openai import OpenAI
	import base64
	from PIL import Image
	import io
	from datetime import datetime

	# OpenAI client setup
	client = OpenAI(
	base_url="https://openrouter.ai/api/v1",
	api_key='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'
	)

	def analyze_image(image, prompt):
	if image is None:
	return "Please upload or capture an image first."

	# Convert image to base64
	buffered = io.BytesIO()
	image.save(buffered, format="JPEG")
	img_str = base64.b64encode(buffered.getvalue()).decode("utf-8")

	try:
	response = client.chat.completions.create(
	model="opengvlab/internvl3-14b:free",
	messages=[
	{
	"role": "system",
	"content": """You are Dalton, an expert AI assistant specialized in image understanding.
	Your tasks include:
	- Extracting and structuring text from images
	- Answering questions about image content
	- Providing detailed descriptions
	- Analyzing receipts, documents, and other visual content
	Be thorough, accurate, and helpful in your responses."""
	},
	{
	"role": "user",
	"content": [
	{"type": "text", "text": prompt},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{img_str}"
	}
	}
	]
	}
	],
	max_tokens=2048
	)

	result = response.choices[0].message.content
	return result

	except Exception as e:
	return f"An error occurred: {str(e)}"

	# Custom CSS for better mobile experience
	css = """
	#mobile-camera { width: 100% !important; }
	#prompt-textbox { min-height: 100px !important; }
	.result-box {
	max-height: 500px;
	overflow-y: auto;
	padding: 15px;
	border: 1px solid #e0e0e0;
	border-radius: 8px;
	}
	.footer {
	margin-top: 20px;
	font-size: 12px;
	color: #666;
	text-align: center;
	}
	"""

	with gr.Blocks(css=css, title="DaltonVision - Koshur AI") as demo:
	gr.Markdown("""
	# 🧾 DaltonVision - InternVL3-14B
	### Advanced Image Understanding • Powered by OpenRouter • Developed by [Koshur AI](https://koshurai.com)
	""")

	with gr.Row():
	with gr.Column():
	# Image input section
	image_input = gr.Image(
	sources=["upload", "webcam"],
	type="pil",
	label="Upload or Capture Image",
	elem_id="mobile-camera"
	)

	# Prompt input
	prompt_input = gr.Textbox(
	label="📝 Enter your question or instruction",
	value="Extract all content structurally",
	lines=3,
	elem_id="prompt-textbox"
	)

	submit_btn = gr.Button("🔍 Analyze Image", variant="primary")

	gr.Examples(
	examples=[
	["What is the total amount on this receipt?"],
	["List all items and their prices"],
	["Who is the vendor and what is the date?"],
	["Describe this image in detail"]
	],
	inputs=[prompt_input],
	label="💡 Try these example prompts:"
	)

	with gr.Column():
	# Result output
	result_output = gr.Markdown(
	label="✅ Analysis Result",
	elem_classes="result-box"
	)

	# Footer
	gr.Markdown("""
	<div class="footer">
	© 2025 Koshur AI. All rights reserved.<br>
	Note: Images are processed in real-time and not stored.
	</div>
	""")

	# Button action
	submit_btn.click(
	fn=analyze_image,
	inputs=[image_input, prompt_input],
	outputs=result_output
	)

	# Launch the app
	if __name__ == "__main__":
	demo.launch()