Spaces:

SauravCh11
/

PassportOCR

Running

File size: 4,908 Bytes

import gradio as gr
import base64
import requests
import json
import re

API_KEY = "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"
IMAGE_MODEL = "opengvlab/internvl3-14b:free"

def extract_json_from_code_block(text):
    try:
        # Match content between triple backticks with 'json'
        match = re.search(r"```json\s*(\{.*?\})\s*```", text, re.DOTALL)
        if not match:
            return {"error": "No JSON block found."}
        
        json_str = match.group(1)

        # Convert to Python dictionary
        return json.loads(json_str)
    except json.JSONDecodeError as e:
        return {"error": f"Invalid JSON: {str(e)}"}

def process_passport(image):
    try:
        with open(image, "rb") as f:
            encoded_image = base64.b64encode(f.read()).decode("utf-8")
        data_url = f"data:image/jpeg;base64,{encoded_image}"

        prompt = f"""You are an advanced OCR and information extraction AI.
        Your task is to meticulously analyze this image and extract all relevant information.

        Output Format Instructions:
        Provide your response as a SINGLE, VALID JSON OBJECT. Do not include any explanatory text before or after the JSON.
        The JSON object should have the following top-level keys:
        - "document_type_detected": (string) Your best guess of the specific document type (e.g., "Passport", "National ID Card", "Driver's License", "Visa Sticker", "Hotel Confirmation Voucher", "Boarding Pass", "Photograph of a person").
        - "extracted_fields": (object) A key-value map of all extracted information. Be comprehensive. Examples:
            - For passports/IDs: "Surname", "Given Names", "Document Number", "Nationality", "Date of Birth", "Sex", "Place of Birth", "Date of Issue", "Date of Expiry", "Issuing Authority", "Country Code".
            - For hotel reservations: "Guest Name", "Hotel Name", "Booking Reference", "Check-in Date", "Check-out Date", "Room Type".
            - For photos: "Description" (e.g., "Portrait of a person", "Image contains text: [text if any]").
        - "mrz_data": (object or null) If a Machine Readable Zone (MRZ) is present:
            - "raw_mrz_lines": (array of strings) Each line of the MRZ.
            - "parsed_mrz": (object) Key-value pairs of parsed MRZ fields (e.g., "passport_type", "issuing_country", "surname", "given_names", "passport_number", "nationality", "dob", "sex", "expiry_date", "personal_number").
            If no MRZ, this field should be null.
        - "multilingual_info": (array of objects or null) For any text segments not in English:
            - Each object: {{"language_detected": "ISO 639-1 code", "original_text": "...", "english_translation_or_transliteration": "..."}}
            If no non-English text, this field can be null or an empty array.
        - "full_text_ocr": (string) Concatenation of all text found on the document.

        Extraction Guidelines:
        1.  Prioritize accuracy. If unsure about a character or word, indicate uncertainty if possible, or extract the most likely interpretation.
        2.  Extract all visible text, including small print, stamps, and handwritten annotations if legible.
        3.  For dates, try to use ISO 8601 format (YYYY-MM-DD) if possible, but retain original format if conversion is ambiguous.
        4.  If the image is a photo of a person without much text, the "extracted_fields" might contain a description, and "full_text_ocr" might be minimal.
        5.  If the document is multi-page and only one page is provided, note this if apparent.

        Ensure the entire output strictly adheres to the JSON format.
        """

        payload = {
            "model": IMAGE_MODEL,
            "messages": [
                {
                    "role": "user",
                    "content": [
                        {"type": "text", "text": prompt},
                        {"type": "image_url", "image_url": {"url": data_url}}
                    ]
                }
            ]
        }

        headers = {
            "Authorization": f"Bearer {API_KEY}",
            "Content-Type": "application/json"
        }

        response = requests.post("https://openrouter.ai/api/v1/chat/completions", headers=headers, json=payload)
        result = response.json()

        return json.dumps(extract_json_from_code_block(result["choices"][0]["message"]["content"]), indent=2)

    except Exception as e:
        return f"⚠️ Error: {str(e)}"

# Gradio Interface
iface = gr.Interface(
    fn=process_passport,
    inputs=gr.Image(type="filepath", label="Upload Passport Front"),
    outputs=gr.Code(label="JSON Result", language="json"),
    title="Passport Front Image Extractor",
    description="Upload a front image of a passport. The app will extract the visible details and return the result as JSON."
)

if __name__ == "__main__":
    iface.launch()