Spaces:

Sugamdeol
/

ocr-api

Running

App Files Files Community

ocr-api / app.py

Sugamdeol

Update app.py

a73345a verified 3 days ago

raw

history blame contribute delete

2.57 kB

	# app.py

	import os
	import io
	import uvicorn
	from fastapi import FastAPI, File, UploadFile, HTTPException
	from fastapi.responses import StreamingResponse
	from fastapi.middleware.cors import CORSMiddleware
	from PIL import Image
	from pydantic import BaseModel
	from typing import List

	from image_processor import enhance_image_fast, extract_text_from_image_fast, process_pdf_in_parallel

	app = FastAPI(
	title="High-Speed OCR API",
	description="An API to extract text from images and PDFs, optimized for speed.",
	version="5.0.0-hf-final"
	)

	# CORS Middleware to allow requests from any origin
	origins = ["*"]
	app.add_middleware(
	CORSMiddleware,
	allow_origins=origins,
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Pydantic Models for structured responses
	class ImageOCRResponse(BaseModel):
	filename: str
	text: str

	class PageResult(BaseModel):
	page_number: int
	text: str

	class PDFOCRResponse(BaseModel):
	filename: str
	total_pages: int
	results: List[PageResult]

	# API Endpoints
	@app.get("/", tags=["General"])
	def read_root():
	return {"message": "Welcome to the High-Speed OCR API. See /docs for documentation."}

	@app.post("/ocr-image", response_model=ImageOCRResponse, tags=["OCR"])
	async def ocr_image_endpoint(file: UploadFile = File(...)):
	if not file.content_type.startswith("image/"):
	raise HTTPException(status_code=400, detail="File must be an image.")
	try:
	contents = await file.read()
	image = Image.open(io.BytesIO(contents))
	enhanced_image = enhance_image_fast(image)
	text = extract_text_from_image_fast(enhanced_image)
	return {"filename": file.filename, "text": text}
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error processing image OCR: {str(e)}")

	@app.post("/ocr-pdf", response_model=PDFOCRResponse, tags=["OCR"])
	async def ocr_pdf_endpoint(file: UploadFile = File(...)):
	if file.content_type != "application/pdf":
	raise HTTPException(status_code=400, detail="File must be a PDF.")
	try:
	contents = await file.read()
	results = process_pdf_in_parallel(contents)
	return {
	"filename": file.filename,
	"total_pages": len(results),
	"results": results
	}
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error processing PDF: {str(e)}")

	# The `if __name__ == "__main__":` block has been completely removed.
	# The platform will import the `app` object and run it.