Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import Qwen2VLForConditionalGeneration, AutoProcessor | |
from util.vision_util import process_vision_info | |
import json | |
from PIL import Image | |
# Load model and processor | |
model = Qwen2VLForConditionalGeneration.from_pretrained( | |
"datamoon/qwen2-vl-iranian-idcard-ocr", | |
device_map="auto" | |
) | |
processor = AutoProcessor.from_pretrained( | |
"datamoon/qwen2-vl-iranian-idcard-ocr", | |
padding_side="left" | |
) | |
def process_id_card(image_path): | |
try: | |
# Prepare the message with image and instruction | |
messages = [{ | |
"role": "user", | |
"content": [ | |
{"type": "image", "image": image_path}, | |
{"type": "text", "text": """From this image which is a persian national id card, | |
return a JSON object with these exact fields: | |
{ | |
"national_id": "...", | |
"first_name": "...", | |
"last_name": "...", | |
"date_of_birth": "...", | |
"father_name": "...", | |
"expiry_date": "..." | |
} | |
Return ONLY the JSON object, nothing else."""} | |
] | |
}] | |
# Process vision inputs | |
image_inputs, _ = process_vision_info(messages) | |
# Prepare text input | |
text = processor.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True | |
) | |
# Process inputs | |
inputs = processor( | |
text=text, | |
images=image_inputs, | |
return_tensors="pt", | |
).to(model.device) | |
# Generate response | |
generated_ids = model.generate( | |
**inputs, | |
max_new_tokens=256, | |
do_sample=False | |
) | |
# Decode and clean output | |
generated_text = processor.batch_decode( | |
generated_ids[:, inputs.input_ids.shape[1]:], | |
skip_special_tokens=True, | |
clean_up_tokenization_spaces=False | |
)[0] | |
# Try to extract JSON from the output | |
try: | |
# Find JSON start and end | |
json_start = generated_text.find('{') | |
json_end = generated_text.rfind('}') + 1 | |
json_str = generated_text[json_start:json_end] | |
# Parse and validate JSON | |
result = json.loads(json_str) | |
required_fields = [ | |
"national_id", "first_name", "last_name", | |
"date_of_birth", "father_name", "expiry_date" | |
] | |
for field in required_fields: | |
if field not in result: | |
raise ValueError(f"Missing field: {field}") | |
return result | |
except (json.JSONDecodeError, ValueError) as e: | |
return {"error": f"Could not parse model output: {str(e)}", "raw_output": generated_text} | |
except Exception as e: | |
return {"error": str(e)} | |
# Create Gradio interface | |
iface = gr.Interface( | |
fn=process_id_card, | |
inputs=gr.Image(type="filepath", label="Upload ID Card Image"), | |
outputs=gr.JSON(label="Extracted Information"), | |
title="Persian ID Card Reader", | |
description="""Upload an image of an Iranian national ID card to extract information. | |
The system will return: national_id, first_name, last_name, date_of_birth, father_name, and expiry_date.""", | |
examples=[ | |
["examples/id1.png"] | |
], | |
allow_flagging="never" | |
) | |
# Launch with queue for better performance | |
iface.launch( | |
server_name="0.0.0.0", | |
share=False, | |
debug=False | |
) | |