Spaces:

Digitaljoint
/

ProofCheck

Sleeping

App Files Files Community

Yaz Hobooti commited on Sep 27, 2025

Commit

828bfe1

1 Parent(s): e4d5933

Replace barcode reader with robust ZXing-CPP implementation

Browse files

Files changed (4) hide show

app.py +4 -4
barcode_reader.py +317 -0
barcode_utils.py +0 -169
requirements.txt +3 -2

app.py CHANGED Viewed

@@ -52,7 +52,7 @@ except Exception:
     HAS_REGEX = False
 try:
-    from barcode_utils import read_barcodes_from_path
     HAS_BARCODE = True
 except Exception:
     read_barcodes_from_path = None
@@ -1117,10 +1117,10 @@ def compare_pdfs(file_a, file_b):
         print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
         if HAS_BARCODE:
-            # Use new barcode detection from barcode_utils
             try:
-                codes_a = read_barcodes_from_path(file_a.name, max_pages=5, raster_dpi=900)
-                codes_b = read_barcodes_from_path(file_b.name, max_pages=5, raster_dpi=900)
                 # Convert to old format for compatibility
                 bar_a, info_a = [], []

     HAS_REGEX = False
 try:
+    from barcode_reader import read_barcodes_from_path
     HAS_BARCODE = True
 except Exception:
     read_barcodes_from_path = None
         print(f"Spell check results - A: {len(misspell_a)} boxes, B: {len(misspell_b)} boxes")
         if HAS_BARCODE:
+            # Use new barcode detection from barcode_reader
             try:
+                codes_a = read_barcodes_from_path(file_a.name, max_pages=8, raster_dpis=(400, 600, 900))
+                codes_b = read_barcodes_from_path(file_b.name, max_pages=8, raster_dpis=(400, 600, 900))
                 # Convert to old format for compatibility
                 bar_a, info_a = [], []

barcode_reader.py ADDED Viewed

	@@ -0,0 +1,317 @@

+"""
+Robust barcode reader for images and PDFs.
+Strategy (in order):
+1) PDF -> extract embedded image XObjects at native resolution (no raster loss) and decode.
+2) If nothing found, rasterize PDF page(s) at high DPI (400/600/900) and decode.
+3) For plain images, decode directly.
+Engines:
+- Primary: ZXing-CPP (zxingcpp)  -> no system packages required
+- Fallback: OpenCV contrib barcode (if available)
+Outputs are normalized dicts:
+{ 'engine', 'source', 'page', 'type', 'text', 'polygon': [[x,y] * 4] }
+"""
+from __future__ import annotations
+import io
+import os
+from typing import Any, Dict, List, Tuple, Optional
+import numpy as np
+from PIL import Image
+import cv2
+# ---------- Engines ----------
+HAS_ZXING = False
+try:
+    import zxingcpp  # pip install zxing-cpp
+    HAS_ZXING = True
+except Exception:
+    zxingcpp = None
+    HAS_ZXING = False
+HAS_OCV_BARCODE = hasattr(cv2, "barcode") and hasattr(getattr(cv2, "barcode"), "BarcodeDetector")
+# ---------- PDF (PyMuPDF) ----------
+try:
+    import fitz  # PyMuPDF
+    HAS_PYMUPDF = True
+except Exception:
+    fitz = None
+    HAS_PYMUPDF = False
+# =========================
+# Utils
+# =========================
+def _to_bgr(img: Image.Image) -> np.ndarray:
+    arr = np.array(img.convert("RGB"))
+    return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
+def _as_gray(arr_bgr: np.ndarray) -> np.ndarray:
+    return cv2.cvtColor(arr_bgr, cv2.COLOR_BGR2GRAY)
+def _preprocess_candidates(bgr: np.ndarray) -> List[np.ndarray]:
+    """
+    Generate a small set of preprocess variants to improve 1D and 2D decoding.
+    Keep this list short—HF Spaces need to stay responsive.
+    """
+    out = [bgr]
+    h, w = bgr.shape[:2]
+    # Slight sharpening helps thin 1D bars
+    k = np.array([[0, -1, 0],
+                  [-1, 5, -1],
+                  [0, -1, 0]], dtype=np.float32)
+    sharp = cv2.filter2D(bgr, -1, k)
+    out.append(sharp)
+    # CLAHE on gray
+    g = _as_gray(bgr)
+    clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8)).apply(g)
+    out.append(cv2.cvtColor(clahe, cv2.COLOR_GRAY2BGR))
+    # Slight upscale for tiny barcodes
+    if max(h, w) < 1600:
+        up = cv2.resize(bgr, (0, 0), fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
+        out.append(up)
+    return out
+def _norm_polygon(pts: Any, w: int, h: int) -> List[List[float]]:
+    """
+    Normalize whatever the engine returns into 4 point polygon [[x,y],...].
+    If fewer than 4 points are given, approximate with a bounding box.
+    """
+    try:
+        p = np.array(pts, dtype=np.float32).reshape(-1, 2)
+        if p.shape[0] >= 4:
+            p = p[:4]
+        else:
+            # make a box
+            x1, y1 = p.min(axis=0)
+            x2, y2 = p.max(axis=0)
+            p = np.array([[x1, y1], [x2, y1], [x2, y2], [x1, y2]], dtype=np.float32)
+    except Exception:
+        p = np.array([[0, 0], [w, 0], [w, h], [0, h]], dtype=np.float32)
+    return p.astype(float).tolist()
+def _dedupe(results: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """
+    Deduplicate by (text, type) and polygon IoU.
+    """
+    keep: List[Dict[str, Any]] = []
+    def iou(a, b):
+        ax = np.array(a["polygon"], dtype=np.float32)
+        bx = np.array(b["polygon"], dtype=np.float32)
+        a_min = ax.min(axis=0); a_max = ax.max(axis=0)
+        b_min = bx.min(axis=0); b_max = bx.max(axis=0)
+        inter_min = np.maximum(a_min, b_min)
+        inter_max = np.minimum(a_max, b_max)
+        wh = np.maximum(inter_max - inter_min, 0)
+        inter = wh[0] * wh[1]
+        a_area = (a_max - a_min).prod()
+        b_area = (b_max - b_min).prod()
+        union = max(a_area + b_area - inter, 1e-6)
+        return float(inter / union)
+    for r in results:
+        dup = False
+        for k in keep:
+            if r["text"] == k["text"] and r["type"] == k["type"] and iou(r, k) > 0.7:
+                dup = True
+                break
+        if not dup:
+            keep.append(r)
+    return keep
+# =========================
+# Decoders
+# =========================
+def _decode_zxing(bgr: np.ndarray) -> List[Dict[str, Any]]:
+    if not HAS_ZXING:
+        return []
+    hits: List[Dict[str, Any]] = []
+    # ZXing works on gray or color; we'll try a couple of variants
+    for candidate in _preprocess_candidates(bgr):
+        try:
+            res = zxingcpp.read_barcodes(candidate)  # returns list
+        except Exception:
+            continue
+        for r in res or []:
+            try:
+                fmt = getattr(r.format, "name", str(r.format))
+            except Exception:
+                fmt = str(r.format)
+            poly = []
+            try:
+                pos = r.position  # list of points with .x/.y
+                poly = [[float(pt.x), float(pt.y)] for pt in pos]
+            except Exception:
+                h, w = candidate.shape[:2]
+                poly = _norm_polygon([], w, h)
+            hits.append({
+                "engine": "zxingcpp",
+                "type": fmt,
+                "text": r.text or "",
+                "polygon": poly,
+            })
+        if hits:
+            break  # good enough
+    return hits
+def _decode_opencv(bgr: np.ndarray) -> List[Dict[str, Any]]:
+    if not HAS_OCV_BARCODE:
+        return []
+    det = cv2.barcode.BarcodeDetector()
+    hits: List[Dict[str, Any]] = []
+    for candidate in _preprocess_candidates(bgr):
+        gray = _as_gray(candidate)
+        ok, infos, types, corners = det.detectAndDecode(gray)
+        if not ok:
+            continue
+        for txt, typ, pts in zip(infos, types, corners):
+            if not txt:
+                continue
+            h, w = candidate.shape[:2]
+            poly = _norm_polygon(pts, w, h)
+            hits.append({
+                "engine": "opencv_barcode",
+                "type": typ,
+                "text": txt,
+                "polygon": poly,
+            })
+        if hits:
+            break
+    return hits
+def _decode_any(bgr: np.ndarray) -> List[Dict[str, Any]]:
+    # Prefer ZXing; it's generally stronger across symbologies
+    res = _decode_zxing(bgr)
+    if res:
+        return res
+    return _decode_opencv(bgr)
+# =========================
+# Image & PDF readers
+# =========================
+def _pdf_extract_xobject_images(path: str, page_index: Optional[int] = None) -> List[Tuple[int, np.ndarray]]:
+    """
+    Return (page, image_bgr) tuples for image XObjects extracted at native resolution.
+    """
+    if not HAS_PYMUPDF:
+        return []
+    out: List[Tuple[int, np.ndarray]] = []
+    doc = fitz.open(path)
+    pages = range(len(doc)) if page_index is None else [page_index]
+    for pno in pages:
+        page = doc[pno]
+        for info in page.get_images(full=True):
+            xref = info[0]
+            pix = fitz.Pixmap(doc, xref)
+            # Convert to RGB if not already
+            if pix.n >= 4:  # includes alpha or CMYK+alpha
+                pix = fitz.Pixmap(fitz.csRGB, pix)
+            pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
+            out.append((pno, _to_bgr(pil)))
+    doc.close()
+    return out
+def _pdf_render_page(path: str, page: int, dpi: int) -> np.ndarray:
+    """
+    Rasterize one page at the given DPI (for vector codes).
+    """
+    if not HAS_PYMUPDF:
+        raise RuntimeError("PyMuPDF not available; cannot rasterize PDF.")
+    doc = fitz.open(path)
+    if page >= len(doc):
+        doc.close()
+        raise ValueError(f"Page {page} out of range; PDF has {len(doc)} pages.")
+    pg = doc[page]
+    scale = dpi / 72.0
+    mat = fitz.Matrix(scale, scale)
+    pix = pg.get_pixmap(matrix=mat, alpha=False)
+    pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
+    doc.close()
+    return _to_bgr(pil)
+def _decode_image_path(path: str) -> List[Dict[str, Any]]:
+    pil = Image.open(path).convert("RGB")
+    bgr = _to_bgr(pil)
+    hits = _decode_any(bgr)
+    for h in hits:
+        h.update({"source": "image", "page": 0})
+    return _dedupe(hits)
+def _decode_pdf_path(path: str, max_pages: int = 8, raster_dpis: Tuple[int, ...] = (400, 600, 900)) -> List[Dict[str, Any]]:
+    results: List[Dict[str, Any]] = []
+    # 1) Try original embedded images first
+    for pno, img_bgr in _pdf_extract_xobject_images(path):
+        hits = _decode_any(img_bgr)
+        for h in hits:
+            h.update({"source": "pdf_xobject_image", "page": pno})
+        results.extend(hits)
+    if results:
+        return _dedupe(results)
+    # 2) Fallback: rasterize pages at increasing DPIs
+    if not HAS_PYMUPDF:
+        # No way to rasterize; return empty
+        return []
+    doc = fitz.open(path)
+    n = min(len(doc), max_pages)
+    doc.close()
+    for dpi in raster_dpis:
+        for pno in range(n):
+            img_bgr = _pdf_render_page(path, pno, dpi=dpi)
+            hits = _decode_any(img_bgr)
+            for h in hits:
+                h.update({"source": f"pdf_raster_{dpi}dpi", "page": pno})
+            results.extend(hits)
+        if results:
+            break
+    return _dedupe(results)
+# =========================
+# Public API
+# =========================
+def read_barcodes_from_path(path: str,
+                            max_pages: int = 8,
+                            raster_dpis: Tuple[int, ...] = (400, 600, 900)) -> List[Dict[str, Any]]:
+    """
+    Auto-detect by extension, decode barcodes, and return a list of dicts:
+    {engine, source, page, type, text, polygon}
+    """
+    ext = os.path.splitext(path.lower())[1]
+    if ext == ".pdf":
+        return _decode_pdf_path(path, max_pages=max_pages, raster_dpis=raster_dpis)
+    else:
+        return _decode_image_path(path)
+# =========================
+# Optional: drawing helper
+# =========================
+def draw_barcodes(bgr: np.ndarray, detections: List[Dict[str, Any]]) -> np.ndarray:
+    out = bgr.copy()
+    for d in detections:
+        poly = np.array(d["polygon"], dtype=np.int32).reshape(-1, 1, 2)
+        cv2.polylines(out, [poly], True, (0, 255, 0), 2)
+        txt = f'{d["type"]}: {d["text"]}'
+        x, y = poly[0, 0, 0], poly[0, 0, 1]
+        cv2.putText(out, txt[:48], (x, max(15, y - 6)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 50, 255), 1, cv2.LINE_AA)
+    return out

barcode_utils.py DELETED Viewed

@@ -1,169 +0,0 @@
-import io
-import os
-from typing import List, Dict, Any, Tuple, Optional
-import cv2
-import numpy as np
-from PIL import Image
-# PDF support via PyMuPDF (preferred for extracting original image XObjects)
-try:
-    import fitz  # PyMuPDF
-    HAS_PYMUPDF = True
-except Exception:
-    fitz = None
-    HAS_PYMUPDF = False
-def _ensure_contrib():
-    if not hasattr(cv2, "barcode") or not hasattr(cv2.barcode, "BarcodeDetector"):
-        raise RuntimeError(
-            "OpenCV was built without the 'barcode' module. "
-            "Install 'opencv-contrib-python-headless' (not 'opencv-python-headless')."
-        )
-def _pil_to_bgr(pil: Image.Image) -> np.ndarray:
-    arr = np.array(pil.convert("RGB"))
-    return cv2.cvtColor(arr, cv2.COLOR_RGB2BGR)
-def _decode_with_opencv(img_bgr: np.ndarray) -> List[Dict[str, Any]]:
-    _ensure_contrib()
-    det = cv2.barcode.BarcodeDetector()
-    # Try 4 orientations
-    results: List[Dict[str, Any]] = []
-    for k, rot in enumerate([0, 1, 2, 3]):  # 0, 90, 180, 270
-        if rot > 0:
-            img = np.ascontiguousarray(np.rot90(img_bgr, k=rot))
-        else:
-            img = img_bgr
-        # Optional light preproc to help 1D codes
-        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        gray = cv2.bilateralFilter(gray, d=5, sigmaColor=50, sigmaSpace=50)
-        ok, decoded_info, decoded_type, corners = det.detectAndDecode(gray)
-        if not ok:
-            continue
-        # corners: list of Nx4x2
-        for txt, typ, pts in zip(decoded_info, decoded_type, corners):
-            if not txt:
-                continue
-            pts = np.asarray(pts, dtype=np.float32)
-            # rotate points back to original orientation
-            if rot > 0:
-                h, w = img_bgr.shape[:2]
-                if rot == 1:   # 90
-                    pts = np.stack([h - pts[:,1], pts[:,0]], axis=1)
-                elif rot == 2: # 180
-                    pts = np.stack([w - pts[:,0], h - pts[:,1]], axis=1)
-                elif rot == 3: # 270
-                    pts = np.stack([pts[:,1], w - pts[:,0]], axis=1)
-            results.append({
-                "text": txt,
-                "type": typ,
-                "polygon": pts.tolist(),  # four points
-                "rotation_quarters": rot
-            })
-    return results
-def _extract_pdf_images_bgr(path: str, page_index: Optional[int] = None) -> List[Tuple[int, np.ndarray]]:
-    """
-    Returns list of (page_idx, img_bgr) extracted at native resolution from image XObjects.
-    """
-    if not HAS_PYMUPDF:
-        return []
-    out: List[Tuple[int, np.ndarray]] = []
-    doc = fitz.open(path)
-    pages = range(len(doc)) if page_index is None else [page_index]
-    for pno in pages:
-        page = doc[pno]
-        for imginfo in page.get_images(full=True):
-            xref = imginfo[0]
-            pix = fitz.Pixmap(doc, xref)
-            # Convert to RGB if needed
-            if pix.n >= 4:  # RGBA or CMYK+alpha
-                pix = fitz.Pixmap(fitz.csRGB, pix)
-            pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
-            out.append((pno, _pil_to_bgr(pil)))
-            pix = None
-    doc.close()
-    return out
-def _render_pdf_page_bgr(path: str, pno: int, dpi: int = 600) -> np.ndarray:
-    if not HAS_PYMUPDF:
-        raise RuntimeError("PyMuPDF not available to render PDF pages.")
-    doc = fitz.open(path)
-    if pno >= len(doc):
-        doc.close()
-        raise ValueError(f"Page {pno} out of range (PDF has {len(doc)} pages).")
-    page = doc[pno]
-    scale = dpi / 72.0
-    mat = fitz.Matrix(scale, scale)
-    pix = page.get_pixmap(matrix=mat, alpha=False)
-    pil = Image.open(io.BytesIO(pix.tobytes("png"))).convert("RGB")
-    doc.close()
-    return _pil_to_bgr(pil)
-def read_barcodes_from_path(path: str, max_pages: int = 5, raster_dpi: int = 900) -> List[Dict[str, Any]]:
-    """
-    Unified entry point:
-    - For images: decode directly with OpenCV.
-    - For PDFs: try original image XObjects first (raw), then rasterize pages at high DPI as fallback.
-    Returns a list of dicts: {source, page, type, text, polygon}
-    """
-    ext = os.path.splitext(path.lower())[1]
-    results: List[Dict[str, Any]] = []
-    if ext == ".pdf":
-        # 1) Try native images embedded in the PDF
-        for pno, img in _extract_pdf_images_bgr(path):
-            hits = _decode_with_opencv(img)
-            for h in hits:
-                results.append({
-                    "source": "pdf_xobject_image",
-                    "page": pno,
-                    **h
-                })
-        if results:
-            return results
-        # 2) Fallback: rasterize a few pages crisply and decode
-        if not HAS_PYMUPDF:
-            raise RuntimeError("No PyMuPDF; cannot rasterize PDF pages. Add 'pymupdf' to requirements.")
-        doc = fitz.open(path)
-        for pno in range(min(len(doc), max_pages)):
-            page_img = _render_pdf_page_bgr(path, pno, dpi=raster_dpi)
-            hits = _decode_with_opencv(page_img)
-            for h in hits:
-                results.append({
-                    "source": "pdf_rasterized",
-                    "page": pno,
-                    **h
-                })
-        doc.close()
-        return results
-    else:
-        # Image path
-        pil = Image.open(path).convert("RGB")
-        img = _pil_to_bgr(pil)
-        hits = _decode_with_opencv(img)
-        for h in hits:
-            results.append({
-                "source": "image",
-                "page": 0,
-                **h
-            })
-        return results
-def draw_polys(bgr: np.ndarray, polys: list) -> np.ndarray:
-    """Draw polygons on the image for visualization"""
-    out = bgr.copy()
-    for p in polys:
-        if "polygon" in p:
-            pts = np.array(p["polygon"], dtype=np.int32).reshape(-1,1,2)
-            cv2.polylines(out, [pts], True, (0, 255, 0), 2)
-    return out

requirements.txt CHANGED Viewed

@@ -1,9 +1,10 @@
-opencv-contrib-python-headless==4.10.0.84
 numpy
 pillow
 pdf2image
 gradio
-PyMuPDF>=1.24
 pytesseract
 pyspellchecker
 regex

 numpy
 pillow
+pymupdf
+opencv-contrib-python-headless==4.10.0.84
+zxing-cpp>=2.2.0
 pdf2image
 gradio
 pytesseract
 pyspellchecker
 regex