Spaces:
Runtime error
Runtime error
| import os | |
| import io | |
| import easyocr | |
| import numpy as np | |
| from fastapi import FastAPI, UploadFile, File | |
| from fastapi.responses import JSONResponse | |
| from PIL import Image | |
| from pdf2image import convert_from_bytes | |
| from concurrent.futures import ThreadPoolExecutor | |
| # ========================= | |
| # EasyOCR config | |
| # ========================= | |
| MODEL_DIR = "/app/.EasyOCR" | |
| USER_NET_DIR = os.path.join(MODEL_DIR, "user_network") | |
| os.makedirs(MODEL_DIR, exist_ok=True) | |
| os.makedirs(USER_NET_DIR, exist_ok=True) | |
| # β preload reader with cached models | |
| reader = easyocr.Reader( | |
| ['en', 'hi'], # langs (reduce if only English needed) | |
| model_storage_directory=MODEL_DIR, | |
| user_network_directory=USER_NET_DIR, | |
| download_enabled=False # π« block downloads at runtime | |
| ) | |
| # ========================= | |
| # FastAPI app | |
| # ========================= | |
| app = FastAPI() | |
| async def root(): | |
| return {"message": "OCR API is running on Hugging Face π"} | |
| def run_ocr_on_image(image: Image.Image): | |
| """Convert PIL β numpy and run OCR""" | |
| image_np = np.array(image) | |
| results = reader.readtext(image_np) | |
| text_results = [] | |
| for bbox, text, prob in results: | |
| # β convert bbox coords to plain Python floats | |
| bbox_py = [[float(x), float(y)] for x, y in bbox] | |
| text_results.append({ | |
| "bbox": bbox_py, | |
| "text": str(text), | |
| "confidence": float(prob) | |
| }) | |
| return text_results | |
| async def ocr(file: UploadFile = File(...)): | |
| try: | |
| contents = await file.read() | |
| # Detect file type | |
| if file.filename.lower().endswith(".pdf"): | |
| # β Convert PDF to images | |
| pages = convert_from_bytes(contents) | |
| # β Run OCR in parallel | |
| text_results = [] | |
| with ThreadPoolExecutor() as executor: | |
| results_list = list(executor.map(run_ocr_on_image, pages)) | |
| for i, page_results in enumerate(results_list, start=1): | |
| text_results.append({ | |
| "page": i, | |
| "results": page_results | |
| }) | |
| return JSONResponse(content={"pdf_results": text_results}) | |
| else: | |
| # β Normal image case | |
| image = Image.open(io.BytesIO(contents)) | |
| text_results = run_ocr_on_image(image) | |
| return JSONResponse(content={"results": text_results}) | |
| except Exception as e: | |
| return JSONResponse(content={"error": str(e)}, status_code=500) | |