Spaces:
Running
Running
import gradio as gr | |
import base64 | |
import requests | |
import json | |
import re | |
API_KEY = "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" | |
IMAGE_MODEL = "opengvlab/internvl3-14b:free" | |
def extract_json_from_code_block(text): | |
try: | |
# Match content between triple backticks with 'json' | |
match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL) | |
if not match: | |
return {"error": "No JSON block found."} | |
json_str = match.group(1) | |
# Convert to Python dictionary | |
return json.loads(json_str) | |
except json.JSONDecodeError as e: | |
return {"error": f"Invalid JSON: {str(e)}"} | |
def process_passport(image): | |
try: | |
with open(image, "rb") as f: | |
encoded_image = base64.b64encode(f.read()).decode("utf-8") | |
data_url = f"data:image/jpeg;base64,{encoded_image}" | |
prompt = f"""You are an advanced OCR and information extraction AI. | |
Your task is to meticulously analyze this image and extract all relevant information. | |
Output Format Instructions: | |
Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON. | |
The JSON object should have the following top-level keys: | |
- "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person"). | |
- "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples: | |
- For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code". | |
- For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type". | |
- For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]"). | |
- "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present: | |
- "raw_mrz_lines": (array of strings) Each line of the MRZ. | |
- "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number"). | |
If no MRZ, this field should be null. | |
- "multilingual_info": (array of objects or null) For any text segments not in English: | |
- Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}} | |
If no non-English text, this field can be null or an empty array. | |
- "full_text_ocr": (string) Concatenation of all text found on the document. | |
Extraction Guidelines: | |
1. Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation. | |
2. Extract all visible text, including small print, stamps, and handwritten annotations if legible. | |
3. For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous. | |
4. If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal. | |
5. If the document is multi-page and only one page is provided, note this if apparent. | |
Ensure the entire output strictly adheres to the JSON format. | |
""" | |
payload = { | |
"model": IMAGE_MODEL, | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": prompt}, | |
{"type": "image_url", "image_url": {"url": data_url}} | |
] | |
} | |
] | |
} | |
headers = { | |
"Authorization": f"Bearer {API_KEY}", | |
"Content-Type": "application/json" | |
} | |
response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload) | |
result = response.json() | |
return json.dumps(extract_json_from_code_block(result["choices"][0]["message"]["content"]), indent=2) | |
except Exception as e: | |
return f"⚠️ Error: {str(e)}" | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=process_passport, | |
inputs=gr.Image(type="filepath", label="Upload Passport Front"), | |
outputs=gr.Code(label="JSON Result", language="json"), | |
title="Passport Front Image Extractor", | |
description="Upload a front image of a passport. The app will extract the visible details and return the result as JSON." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |