iu-cam-reader / app.py
Hyounggyu Kim
Start project
1f1037a
import json
from pydantic import BaseModel
from google import genai
from google.genai import types
from PIL import Image
import gradio as gr
DEFAULT_PROMPT = """Please analyze the two meter images shown - one positioned above and one below. Extract the numerical readings displayed on the front panel of each meter and format them into JSON.
Requirements:
1. Extract all visible numerical digits from the front display of each meter
2. Create a JSON object with two keys: "upper" and "lower"
3. Store the corresponding numerical readings as float values
4. Maintain the exact number of digits shown on each display
Note: Please ensure all digits are clearly read and maintain their original sequence as shown on the displays.
Use this JSON schema:
Return: {'upper': number, 'lower': number}"""
MODELS = ["gemini-2.0-flash-exp"]
class Response(BaseModel):
upper: float
lower: float
def process_image(model_choice, api_token, prompt, image_array):
try:
image = Image.fromarray(image_array)
client = genai.Client(api_key=api_token)
response = client.models.generate_content(
model=model_choice,
contents=[image, prompt],
config=types.GenerateContentConfig(
response_mime_type="application/json",
response_schema=Response,
temperature=0.2,
candidate_count=1,
),
)
return json.loads(response.text)
except Exception as e:
return f"Error processing image: {str(e)}"
# Create the Gradio interface
with gr.Blocks(title="Enhanced Meter Reader") as app:
gr.Markdown("# Advanced Meter Reading Analyzer")
gr.Markdown("""
Upload an image containing two meters and get their readings in JSON format.
Please provide your API token and select the desired model before processing.
""")
with gr.Row():
with gr.Column():
# Configuration inputs
api_token = gr.Textbox(
label="API Token",
placeholder="Enter your Gemini API token",
type="password",
)
model_dropdown = gr.Dropdown(
choices=MODELS,
label="Select Model",
value="gemini-2.0-flash-exp",
)
prompt_input = gr.Textbox(
label="Prompt",
lines=10,
value=DEFAULT_PROMPT
)
with gr.Column():
# Image and prompt inputs
image_input = gr.Image(label="Upload Meter Image")
submit_btn = gr.Button("Analyze Image")
with gr.Column():
# Output component
json_output = gr.JSON(label="Results")
# Connect the main processing function
submit_btn.click(
fn=process_image,
inputs=[model_dropdown, api_token, prompt_input, image_input],
outputs=json_output,
)
# Launch the app
if __name__ == "__main__":
app.launch(share=False) # Set share=False for local-only access