Spaces:

Unique00225
/

img

Runtime error

App Files Files Community

Unique00225 commited on 15 days ago

Commit

a2654bf

verified ·

1 Parent(s): 71938a5

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -94

app.py CHANGED Viewed

@@ -1,66 +1,33 @@
 from transformers import AutoProcessor, AutoModelForVision2Seq
-from PIL import Image
 import torch
-import io
-import base64
-from fastapi import FastAPI, UploadFile, File, HTTPException
-from fastapi.responses import JSONResponse
-import uvicorn
-# Initialize FastAPI app
-app = FastAPI(title="OLM OCR API", description="OCR using allenai/olmOCR-2-7B-1025-FP8")
-# Global variables for model and processor
-processor = None
-model = None
-device = None
 def load_model():
-    """Load the model and processor"""
-    global processor, model, device
-    print("Loading processor...")
     processor = AutoProcessor.from_pretrained("allenai/olmOCR-2-7B-1025-FP8")
-    print("Loading model...")
     model = AutoModelForVision2Seq.from_pretrained(
         "allenai/olmOCR-2-7B-1025-FP8",
         torch_dtype=torch.float16,
         device_map="auto"
     )
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"Model loaded on device: {device}")
-@app.on_event("startup")
-async def startup_event():
-    """Load model on startup"""
-    load_model()
-@app.get("/")
-async def root():
-    return {"message": "OLM OCR API is running!", "model": "allenai/olmOCR-2-7B-1025-FP8"}
-@app.get("/health")
-async def health_check():
-    return {"status": "healthy", "model_loaded": model is not None}
-@app.post("/ocr")
-async def extract_text_from_image(file: UploadFile = File(...)):
     """
-    Extract text from uploaded image
     """
     try:
-        # Check if file is an image
-        if not file.content_type.startswith('image/'):
-            raise HTTPException(status_code=400, detail="File must be an image")
-        # Read image file
-        contents = await file.read()
-        image = Image.open(io.BytesIO(contents)).convert('RGB')
         # Process image and generate text
-        inputs = processor(images=image, return_tensors="pt").to(device)
         with torch.no_grad():
             generated_ids = model.generate(
@@ -75,57 +42,29 @@ async def extract_text_from_image(file: UploadFile = File(...)):
             skip_special_tokens=True
         )[0]
-        return JSONResponse({
-            "success": True,
-            "extracted_text": generated_text,
-            "filename": file.filename,
-            "file_size": len(contents)
-        })
     except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
-@app.post("/ocr/base64")
-async def extract_text_from_base64(data: dict):
-    """
-    Extract text from base64 encoded image
-    """
-    try:
-        if 'image' not in data:
-            raise HTTPException(status_code=400, detail="Missing 'image' field in request")
-        # Decode base64 image
-        image_data = base64.b64decode(data['image'])
-        image = Image.open(io.BytesIO(image_data)).convert('RGB')
-        # Process image and generate text
-        inputs = processor(images=image, return_tensors="pt").to(device)
-        with torch.no_grad():
-            generated_ids = model.generate(
-                **inputs,
-                max_new_tokens=1024,
-                do_sample=False,
-            )
-        # Decode the generated text
-        generated_text = processor.batch_decode(
-            generated_ids,
-            skip_special_tokens=True
-        )[0]
-        return JSONResponse({
-            "success": True,
-            "extracted_text": generated_text
-        })
-    except Exception as e:
-        raise HTTPException(status_code=500, detail=f"Error processing image: {str(e)}")
 if __name__ == "__main__":
-    uvicorn.run(
-        "app:app",
-        host="0.0.0.0",
-        port=8000,
-        reload=True
     )

+import gradio as gr
 from transformers import AutoProcessor, AutoModelForVision2Seq
 import torch
+from PIL import Image
+import os
+# Load model directly
 def load_model():
     processor = AutoProcessor.from_pretrained("allenai/olmOCR-2-7B-1025-FP8")
     model = AutoModelForVision2Seq.from_pretrained(
         "allenai/olmOCR-2-7B-1025-FP8",
         torch_dtype=torch.float16,
         device_map="auto"
     )
+    return processor, model
+# Load model once at startup
+processor, model = load_model()
+def extract_text_from_image(image):
     """
+    Extract text from image using OLM OCR model
     """
     try:
+        # Convert to RGB if needed
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
         # Process image and generate text
+        inputs = processor(images=image, return_tensors="pt")
         with torch.no_grad():
             generated_ids = model.generate(
             skip_special_tokens=True
         )[0]
+        return generated_text
     except Exception as e:
+        return f"Error processing image: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=extract_text_from_image,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=gr.Textbox(label="Extracted Text", lines=10),
+    title="OLM OCR Text Extraction",
+    description="Extract text from images using allenai/olmOCR-2-7B-1025-FP8 model",
+    examples=[
+        ["example1.jpg"],  # You can add example images
+        ["example2.jpg"],
+    ],
+    allow_flagging="never"
+)
+# For Hugging Face Spaces
 if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
     )