docAI / app.py
akashraut's picture
Update app.py
25ca7ed verified
import os
import json
import time
import gradio as gr
import google.generativeai as genai
from PIL import Image
# ============================================================
# Configuration
# ============================================================
API_KEY = os.getenv("GEMINI_API_KEY")
if not API_KEY:
raise RuntimeError("GEMINI_API_KEY missing in Hugging Face Secrets")
genai.configure(api_key=API_KEY)
MODEL_NAME = "gemini-2.5-flash"
model = genai.GenerativeModel(MODEL_NAME)
# Simple global rate limit (HF protection)
LAST_CALL_TS = 0
MIN_INTERVAL = 3 # seconds
# ============================================================
# Core Extraction Logic (Doc-Agnostic)
# ============================================================
def extract_document(image: Image.Image):
global LAST_CALL_TS
# ---- Rate limiting ----
now = time.time()
if now - LAST_CALL_TS < MIN_INTERVAL:
return {"error": "Rate limited. Please wait a few seconds."}
LAST_CALL_TS = now
prompt = """
You are a document intelligence system.
Your job is to analyze ANY document image and produce a
Docsumo-compatible structured JSON output.
DOCUMENT TYPES MAY INCLUDE (but are not limited to):
- Financial statements
- Invoices
- Forms
- Reports
- Letters
- Tables-only documents
--------------------------------
TASKS
--------------------------------
1. Identify document_type and document_subtype.
2. Extract all key-value fields visible in the document.
3. Extract ALL tables with exact row/column structure.
4. If charts/graphs exist, summarize insights textually.
5. Do NOT hallucinate missing data.
6. Preserve numbers exactly as shown.
--------------------------------
OUTPUT RULES
--------------------------------
- Output ONLY valid JSON
- No markdown
- No explanations
- Follow the schema EXACTLY
--------------------------------
DOCSUMO-COMPATIBLE JSON SCHEMA
--------------------------------
{
"document_metadata": {
"document_type": string,
"document_subtype": string,
"page_count": number,
"language": string
},
"extraction": {
"fields": {
"<field_name>": {
"value": string,
"normalized_value": string | null,
"type": "string" | "number" | "date" | "currency" | "enum"
}
},
"tables": {
"<table_id>": {
"table_label": string,
"headers": [string],
"rows": [
{ "<header>": string }
]
}
},
"derived_insights": {
"<insight_name>": {
"value": string
}
}
}
}
"""
try:
response = model.generate_content(
[prompt, image],
generation_config={
"temperature": 0,
"response_mime_type": "application/json"
}
)
return json.loads(response.text)
except Exception as e:
return {"error": str(e)}
# ============================================================
# Gradio UI (HF)
# ============================================================
with gr.Blocks(title="DocAI – Docsumo Compatible") as demo:
gr.Markdown("""
# πŸ“„ DocAI β€” Docsumo-Compatible Document Intelligence
Upload **any document image** (invoice, statement, report, form).
This demo returns a **Docsumo-compatible JSON contract**:
- Document metadata
- Key-value fields
- Tables
- Derived insights
""")
image_input = gr.Image(type="pil", label="Upload Document Image")
extract_btn = gr.Button("Extract Document")
output = gr.JSON(label="Docsumo-Compatible JSON Output")
extract_btn.click(
fn=extract_document,
inputs=image_input,
outputs=output
)
demo.launch()