Spaces:

datamoon
/

qwen2-vl-iranian-idcard-ocr

Runtime error

App Files Files Community

qwen2-vl-iranian-idcard-ocr / app.py

shervin-dadashzadeh

torch

13b328e about 2 months ago

raw

history blame contribute delete

3.69 kB

	import gradio as gr
	from transformers import Qwen2VLForConditionalGeneration, AutoProcessor
	from util.vision_util import process_vision_info
	import json
	from PIL import Image

	# Load model and processor
	model = Qwen2VLForConditionalGeneration.from_pretrained(
	"datamoon/qwen2-vl-iranian-idcard-ocr",
	device_map="auto"
	)
	processor = AutoProcessor.from_pretrained(
	"datamoon/qwen2-vl-iranian-idcard-ocr",
	padding_side="left"
	)

	def process_id_card(image_path):
	try:
	# Prepare the message with image and instruction
	messages = [{
	"role": "user",
	"content": [
	{"type": "image", "image": image_path},
	{"type": "text", "text": """From this image which is a persian national id card,
	return a JSON object with these exact fields:
	{
	"national_id": "...",
	"first_name": "...",
	"last_name": "...",
	"date_of_birth": "...",
	"father_name": "...",
	"expiry_date": "..."
	}
	Return ONLY the JSON object, nothing else."""}
	]
	}]

	# Process vision inputs
	image_inputs, _ = process_vision_info(messages)

	# Prepare text input
	text = processor.apply_chat_template(
	messages,
	tokenize=False,
	add_generation_prompt=True
	)

	# Process inputs
	inputs = processor(
	text=text,
	images=image_inputs,
	return_tensors="pt",
	).to(model.device)

	# Generate response
	generated_ids = model.generate(
	**inputs,
	max_new_tokens=256,
	do_sample=False
	)

	# Decode and clean output
	generated_text = processor.batch_decode(
	generated_ids[:, inputs.input_ids.shape[1]:],
	skip_special_tokens=True,
	clean_up_tokenization_spaces=False
	)[0]

	# Try to extract JSON from the output
	try:
	# Find JSON start and end
	json_start = generated_text.find('{')
	json_end = generated_text.rfind('}') + 1
	json_str = generated_text[json_start:json_end]

	# Parse and validate JSON
	result = json.loads(json_str)
	required_fields = [
	"national_id", "first_name", "last_name",
	"date_of_birth", "father_name", "expiry_date"
	]

	for field in required_fields:
	if field not in result:
	raise ValueError(f"Missing field: {field}")

	return result

	except (json.JSONDecodeError, ValueError) as e:
	return {"error": f"Could not parse model output: {str(e)}", "raw_output": generated_text}

	except Exception as e:
	return {"error": str(e)}

	# Create Gradio interface
	iface = gr.Interface(
	fn=process_id_card,
	inputs=gr.Image(type="filepath", label="Upload ID Card Image"),
	outputs=gr.JSON(label="Extracted Information"),
	title="Persian ID Card Reader",
	description="""Upload an image of an Iranian national ID card to extract information.
	The system will return: national_id, first_name, last_name, date_of_birth, father_name, and expiry_date.""",
	examples=[
	["examples/id1.png"]
	],
	allow_flagging="never"
	)

	# Launch with queue for better performance
	iface.launch(
	server_name="0.0.0.0",
	share=False,
	debug=False
	)