from config import Settings import torch from PIL import Image import io from contextlib import asynccontextmanager from transformers import VisionEncoderDecoderModel from fastapi import FastAPI, UploadFile, Form, HTTPException from transformers import TrOCRProcessor, AutoTokenizer, ViTImageProcessor config = {} @asynccontextmanager async def lifespan(app: FastAPI): settings = Settings() config['settings'] = settings config['device'] = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = AutoTokenizer.from_pretrained(settings.TOKENIZER) feature_extractor = ViTImageProcessor.from_pretrained(settings.FEATURE_EXTRACTOR) config['processor'] = TrOCRProcessor(image_processor=feature_extractor, tokenizer=tokenizer) config['ocr_model'] = VisionEncoderDecoderModel.from_pretrained(settings.OCR_MODEL) yield # Clean up and release the resources config.clear() app = FastAPI(lifespan=lifespan) @app.get("/") def api_home(): return {'detail': 'Welcome to Sinhala OCR Space'} @app.post("/apply-trocr") async def ApplyOCR(file: UploadFile): try: # Read the uploaded image file contents = await file.read() image = Image.open(io.BytesIO(contents)).convert("RGB") pixel_values = config['processor'](image, return_tensors="pt").pixel_values generated_ids = config['ocr_model'].generate(pixel_values) generated_text = config['processor'].batch_decode(generated_ids, skip_special_tokens=True)[0] # Return the extracted text as the response return {"ocr_result": generated_text} except Exception as e: # Handle any exceptions that may occur return {"error": str(e)}