Spaces:

Unique00225
/

img

Runtime error

App Files Files Community

Unique00225 commited on 23 days ago

Commit

71938a5

verified ·

1 Parent(s): bdecb08

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -17

app.py CHANGED Viewed

@@ -1,24 +1,131 @@
-# trocr_infer.py  -- paste into your app.py and call ocr_with_trocr(pil_image)
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 import torch
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Model choices: "microsoft/trocr-base-printed" or "microsoft/trocr-base-handwritten"
-MODEL_NAME = "microsoft/trocr-base-printed"
-processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
-model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME).to(device)
-model.eval()
-def ocr_with_trocr(pil_image):
     """
-    Input: PIL.Image (RGB)
-    Returns: recognized text string
     """
-    # Preprocess
-    pixel_values = processor(images=pil_image, return_tensors="pt").pixel_values.to(device)
-    # Generate (greedy; tune generation params if desired)
-    generated_ids = model.generate(pixel_values, max_length=128, num_beams=1)
-    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    return text.strip()

+from transformers import AutoProcessor, AutoModelForVision2Seq
+from PIL import Image
 import torch
+import io
+import base64
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from fastapi.responses import JSONResponse
+import uvicorn
+# Initialize FastAPI app
+app = FastAPI(title="OLM OCR API", description="OCR using allenai/olmOCR-2-7B-1025-FP8")
+# Global variables for model and processor
+processor = None
+model = None
+device = None
+def load_model():
+    """Load the model and processor"""
+    global processor, model, device
+    print("Loading processor...")
+    processor = AutoProcessor.from_pretrained("allenai/olmOCR-2-7B-1025-FP8")
+    print("Loading model...")
+    model = AutoModelForVision2Seq.from_pretrained(
+        "allenai/olmOCR-2-7B-1025-FP8",
+        torch_dtype=torch.float16,
+        device_map="auto"
+    )
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Model loaded on device: {device}")
+@app.on_event("startup")
+async def startup_event():
+    """Load model on startup"""
+    load_model()
+@app.get("/")
+async def root():
+    return {"message": "OLM OCR API is running!", "model": "allenai/olmOCR-2-7B-1025-FP8"}
+@app.get("/health")
+async def health_check():
+    return {"status": "healthy", "model_loaded": model is not None}
+@app.post("/ocr")
+async def extract_text_from_image(file: UploadFile = File(...)):
     """
+    Extract text from uploaded image
     """
+    try:
+        # Check if file is an image
+        if not file.content_type.startswith('image/'):
+            raise HTTPException(status_code=400, detail="File must be an image")
+        # Read image file
+        contents = await file.read()
+        image = Image.open(io.BytesIO(contents)).convert('RGB')
+        # Process image and generate text
+        inputs = processor(images=image, return_tensors="pt").to(device)
+        with torch.no_grad():
+            generated_ids = model.generate(
+                **inputs,
+                max_new_tokens=1024,
+                do_sample=False,
+            )
+        # Decode the generated text
+        generated_text = processor.batch_decode(
+            generated_ids,
+            skip_special_tokens=True
+        )[0]
+        return JSONResponse({
+            "success": True,
+            "extracted_text": generated_text,
+            "filename": file.filename,
+            "file_size": len(contents)
+        })
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
+@app.post("/ocr/base64")
+async def extract_text_from_base64(data: dict):
+    """
+    Extract text from base64 encoded image
+    """
+    try:
+        if 'image' not in data:
+            raise HTTPException(status_code=400, detail="Missing 'image' field in request")
+        # Decode base64 image
+        image_data = base64.b64decode(data['image'])
+        image = Image.open(io.BytesIO(image_data)).convert('RGB')
+        # Process image and generate text
+        inputs = processor(images=image, return_tensors="pt").to(device)
+        with torch.no_grad():
+            generated_ids = model.generate(
+                **inputs,
+                max_new_tokens=1024,
+                do_sample=False,
+            )
+        # Decode the generated text
+        generated_text = processor.batch_decode(
+            generated_ids,
+            skip_special_tokens=True
+        )[0]
+        return JSONResponse({
+            "success": True,
+            "extracted_text": generated_text
+        })
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
+if __name__ == "__main__":
+    uvicorn.run(
+        "app:app",
+        host="0.0.0.0",
+        port=8000,
+        reload=True
+    )