Spaces:
Sleeping
Sleeping
| import uvicorn | |
| from fastapi import FastAPI, UploadFile, File, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from typing import Optional | |
| import numpy as np | |
| from PIL import Image | |
| from paddleocr import PaddleOCR | |
| from doctr.io import DocumentFile | |
| from doctr.models import ocr_predictor | |
| import io | |
| import logging | |
| class OCRAPIApp: | |
| def __init__(self): | |
| self.app = FastAPI( | |
| docs_url="/", | |
| title="OCR API", | |
| version="1.0", | |
| ) | |
| self.setup_routes() | |
| self.paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True) | |
| self.doctr_model = ocr_predictor(pretrained=True) | |
| def ocr_with_paddle(self, img): | |
| try: | |
| logging.info("Processing image with PaddleOCR...") | |
| result = self.paddle_ocr.ocr(img) | |
| text_output = ' '.join([line[1][0] for line in result[0]]) | |
| return text_output | |
| except Exception as e: | |
| logging.error(f"Error with PaddleOCR: {e}") | |
| raise HTTPException(status_code=500, detail="Error processing image") | |
| def ocr_with_doctr(self, file): | |
| try: | |
| logging.info("Processing PDF with Doctr...") | |
| doc = DocumentFile.from_pdf(file) | |
| result = self.doctr_model(doc) | |
| text_output = '' | |
| for page in result.pages: | |
| for block in page.blocks: | |
| for line in block.lines: | |
| text_output += ' '.join([word.value for word in line.words]) + "\n" | |
| return text_output | |
| except Exception as e: | |
| logging.error(f"Error with Doctr: {e}") | |
| raise HTTPException(status_code=500, detail="Error processing PDF") | |
| async def ocr_endpoint(self, file: UploadFile = File(...)): | |
| try: | |
| file_bytes = await file.read() | |
| if file.filename.endswith(".pdf"): | |
| text_output = self.ocr_with_doctr(io.BytesIO(file_bytes)) | |
| else: | |
| img = np.array(Image.open(io.BytesIO(file_bytes))) | |
| text_output = self.ocr_with_paddle(img) | |
| return {"ocr_text": text_output} | |
| except Exception as e: | |
| logging.error(f"Error processing file: {e}") | |
| raise HTTPException(status_code=500, detail="Error processing file") | |
| def setup_routes(self): | |
| self.app.post("/ocr")(self.ocr_endpoint) | |
| # Initialize the app | |
| app = OCRAPIApp().app | |
| # Add CORS middleware for cross-origin requests | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |