Spaces:

uraniborg-ai
/

iu-cam-reader

Sleeping

iu-cam-reader / app.py

Hyounggyu Kim

Start project

1f1037a 2 months ago

3.06 kB

	import json

	from pydantic import BaseModel

	from google import genai
	from google.genai import types

	from PIL import Image

	import gradio as gr

	DEFAULT_PROMPT = """Please analyze the two meter images shown - one positioned above and one below. Extract the numerical readings displayed on the front panel of each meter and format them into JSON.

	Requirements:
	1. Extract all visible numerical digits from the front display of each meter
	2. Create a JSON object with two keys: "upper" and "lower"
	3. Store the corresponding numerical readings as float values
	4. Maintain the exact number of digits shown on each display

	Note: Please ensure all digits are clearly read and maintain their original sequence as shown on the displays.

	Use this JSON schema:

	Return: {'upper': number, 'lower': number}"""


	MODELS = ["gemini-2.0-flash-exp"]


	class Response(BaseModel):
	upper: float
	lower: float


	def process_image(model_choice, api_token, prompt, image_array):
	try:
	image = Image.fromarray(image_array)
	client = genai.Client(api_key=api_token)
	response = client.models.generate_content(
	model=model_choice,
	contents=[image, prompt],
	config=types.GenerateContentConfig(
	response_mime_type="application/json",
	response_schema=Response,
	temperature=0.2,
	candidate_count=1,
	),
	)
	return json.loads(response.text)

	except Exception as e:
	return f"Error processing image: {str(e)}"


	# Create the Gradio interface
	with gr.Blocks(title="Enhanced Meter Reader") as app:
	gr.Markdown("# Advanced Meter Reading Analyzer")
	gr.Markdown("""
	Upload an image containing two meters and get their readings in JSON format.
	Please provide your API token and select the desired model before processing.
	""")

	with gr.Row():
	with gr.Column():
	# Configuration inputs
	api_token = gr.Textbox(
	label="API Token",
	placeholder="Enter your Gemini API token",
	type="password",
	)
	model_dropdown = gr.Dropdown(
	choices=MODELS,
	label="Select Model",
	value="gemini-2.0-flash-exp",
	)
	prompt_input = gr.Textbox(
	label="Prompt",
	lines=10,
	value=DEFAULT_PROMPT
	)

	with gr.Column():
	# Image and prompt inputs
	image_input = gr.Image(label="Upload Meter Image")
	submit_btn = gr.Button("Analyze Image")

	with gr.Column():
	# Output component
	json_output = gr.JSON(label="Results")

	# Connect the main processing function
	submit_btn.click(
	fn=process_image,
	inputs=[model_dropdown, api_token, prompt_input, image_input],
	outputs=json_output,
	)

	# Launch the app
	if __name__ == "__main__":
	app.launch(share=False) # Set share=False for local-only access