OCR / app.py
Rivalcoder
Add files
4b32c6f
raw
history blame
2.54 kB
import os
import io
import easyocr
import numpy as np
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import JSONResponse
from PIL import Image
from pdf2image import convert_from_bytes
from concurrent.futures import ThreadPoolExecutor
# =========================
# EasyOCR config
# =========================
MODEL_DIR = "/app/.EasyOCR"
USER_NET_DIR = os.path.join(MODEL_DIR, "user_network")
os.makedirs(MODEL_DIR, exist_ok=True)
os.makedirs(USER_NET_DIR, exist_ok=True)
# βœ… preload reader with cached models
reader = easyocr.Reader(
['en', 'hi'], # langs (reduce if only English needed)
model_storage_directory=MODEL_DIR,
user_network_directory=USER_NET_DIR,
download_enabled=False # 🚫 block downloads at runtime
)
# =========================
# FastAPI app
# =========================
app = FastAPI()
@app.get("/")
async def root():
return {"message": "OCR API is running on Hugging Face πŸš€"}
def run_ocr_on_image(image: Image.Image):
"""Convert PIL β†’ numpy and run OCR"""
image_np = np.array(image)
results = reader.readtext(image_np)
text_results = []
for bbox, text, prob in results:
# βœ… convert bbox coords to plain Python floats
bbox_py = [[float(x), float(y)] for x, y in bbox]
text_results.append({
"bbox": bbox_py,
"text": str(text),
"confidence": float(prob)
})
return text_results
@app.post("/ocr")
async def ocr(file: UploadFile = File(...)):
try:
contents = await file.read()
# Detect file type
if file.filename.lower().endswith(".pdf"):
# βœ… Convert PDF to images
pages = convert_from_bytes(contents)
# βœ… Run OCR in parallel
text_results = []
with ThreadPoolExecutor() as executor:
results_list = list(executor.map(run_ocr_on_image, pages))
for i, page_results in enumerate(results_list, start=1):
text_results.append({
"page": i,
"results": page_results
})
return JSONResponse(content={"pdf_results": text_results})
else:
# βœ… Normal image case
image = Image.open(io.BytesIO(contents))
text_results = run_ocr_on_image(image)
return JSONResponse(content={"results": text_results})
except Exception as e:
return JSONResponse(content={"error": str(e)}, status_code=500)