|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import cv2 |
|
|
from paddleocr import PaddleOCR |
|
|
|
|
|
ocr = None |
|
|
|
|
|
def get_ocr(): |
|
|
global ocr |
|
|
if ocr is None: |
|
|
ocr = PaddleOCR(use_angle_cls=True, lang="en") |
|
|
return ocr |
|
|
|
|
|
def preprocess_for_ocr(pil_img): |
|
|
rgb = np.array(pil_img) |
|
|
if rgb.dtype != np.uint8: |
|
|
rgb = rgb.astype(np.uint8) |
|
|
|
|
|
bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR) |
|
|
|
|
|
|
|
|
h, w = bgr.shape[:2] |
|
|
scale = 1.8 |
|
|
bgr = cv2.resize(bgr, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_CUBIC) |
|
|
|
|
|
|
|
|
lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB) |
|
|
l, a, b = cv2.split(lab) |
|
|
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8)) |
|
|
l2 = clahe.apply(l) |
|
|
lab2 = cv2.merge([l2, a, b]) |
|
|
bgr = cv2.cvtColor(lab2, cv2.COLOR_LAB2BGR) |
|
|
|
|
|
return bgr |
|
|
|
|
|
def crop_bottom_text_region(bgr): |
|
|
"""Kids books often place text at the bottom. Crop bottom ~35%.""" |
|
|
h, w = bgr.shape[:2] |
|
|
return bgr[int(h * 0.65):h, 0:w] |
|
|
|
|
|
def cloud_ocr_stub(_pil_img): |
|
|
""" |
|
|
Placeholder for GPU/VLM OCR (olmOCR-2 / Nanonets OCR2). |
|
|
Later this will call an API endpoint. |
|
|
""" |
|
|
return "[Cloud OCR placeholder] PaddleOCR confidence was low. Next: call olmOCR-2 / OCR2 via API." |
|
|
|
|
|
def run_ocr(img): |
|
|
if img is None: |
|
|
return "(No image)", 0.0, "No" |
|
|
|
|
|
bgr = preprocess_for_ocr(img) |
|
|
bgr_crop = crop_bottom_text_region(bgr) |
|
|
|
|
|
ocr_engine = get_ocr() |
|
|
result = ocr_engine.ocr(bgr_crop) |
|
|
|
|
|
lines = [] |
|
|
confs = [] |
|
|
|
|
|
blocks = result[0] if isinstance(result, list) and result and isinstance(result[0], list) else result |
|
|
for item in blocks: |
|
|
try: |
|
|
text, conf = item[1] |
|
|
lines.append(str(text)) |
|
|
confs.append(float(conf)) |
|
|
except Exception: |
|
|
continue |
|
|
|
|
|
extracted = "\n".join(lines).strip() |
|
|
avg_conf = float(sum(confs) / len(confs)) if confs else 0.0 |
|
|
|
|
|
|
|
|
needs_cloud = (avg_conf < 0.45) or (len(extracted) < 15) |
|
|
|
|
|
if needs_cloud: |
|
|
extracted = extracted if extracted else "(PaddleOCR found no text)\n\n" + cloud_ocr_stub(img) |
|
|
return extracted, avg_conf, "Yes" |
|
|
|
|
|
return extracted, avg_conf, "No" |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=run_ocr, |
|
|
inputs=gr.Image(type="pil", label="Upload a page photo"), |
|
|
outputs=[ |
|
|
gr.Textbox(label="Extracted text", lines=12), |
|
|
gr.Number(label="Average confidence (0–1)"), |
|
|
gr.Textbox(label="Cloud fallback needed?", interactive=False), |
|
|
], |
|
|
title="BookReader × Reachy Mini", |
|
|
description="CPU PaddleOCR + smart fallback (VLM OCR stub). Crops bottom text region for kid books.", |
|
|
) |
|
|
|
|
|
demo.launch(ssr_mode=False) |
|
|
|