Spaces:

Unique00225
/

img

Runtime error

App Files Files Community

Unique00225 commited on 12 days ago

Commit

f4d5db9

verified ·

1 Parent(s): e49c2db

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -45

app.py CHANGED Viewed

@@ -2,69 +2,70 @@ import gradio as gr
 from transformers import AutoProcessor, AutoModelForVision2Seq
 import torch
 from PIL import Image
-import os
-# Load model directly
 def load_model():
-    processor = AutoProcessor.from_pretrained("allenai/olmOCR-2-7B-1025-FP8")
-    model = AutoModelForVision2Seq.from_pretrained(
-        "allenai/olmOCR-2-7B-1025-FP8",
-        torch_dtype=torch.float16,
-        device_map="auto"
-    )
-    return processor, model
-# Load model once at startup
 processor, model = load_model()
 def extract_text_from_image(image):
-    """
-    Extract text from image using OLM OCR model
-    """
     try:
-        # Convert to RGB if needed
-        if image.mode != 'RGB':
-            image = image.convert('RGB')
-        # Process image and generate text
-        inputs = processor(images=image, return_tensors="pt")
         with torch.no_grad():
-            generated_ids = model.generate(
                 **inputs,
-                max_new_tokens=1024,
                 do_sample=False,
             )
-        # Decode the generated text
-        generated_text = processor.batch_decode(
-            generated_ids,
-            skip_special_tokens=True
-        )[0]
-        return generated_text
     except Exception as e:
-        return f"Error processing image: {str(e)}"
-# Create Gradio interface
 demo = gr.Interface(
-    fn=extract_text_from_image,
-    inputs=gr.Image(type="pil", label="Upload Image"),
-    outputs=gr.Textbox(label="Extracted Text", lines=10),
-    title="OLM OCR Text Extraction",
-    description="Extract text from images using allenai/olmOCR-2-7B-1025-FP8 model",
-    examples=[
-        ["example1.jpg"],  # You can add example images
-        ["example2.jpg"],
-    ],
-    allow_flagging="never"
 )
-# For Hugging Face Spaces
 if __name__ == "__main__":
-    demo.launch(
-        server_name="0.0.0.0",
-        server_port=7860,
-        share=False
-    )

 from transformers import AutoProcessor, AutoModelForVision2Seq
 import torch
 from PIL import Image
+# Check if we have enough memory, otherwise use CPU
+device = "cuda" if torch.cuda.is_available() else "cpu"
+torch_dtype = torch.float16 if device == "cuda" else torch.float32
+@gr.cache_resource
 def load_model():
+    try:
+        print("Loading OLM OCR model...")
+        # Load with optimizations for limited resources
+        processor = AutoProcessor.from_pretrained("allenai/olmOCR-2-7B-1025-FP8")
+        model = AutoModelForVision2Seq.from_pretrained(
+            "allenai/olmOCR-2-7B-1025-FP8",
+            torch_dtype=torch_dtype,
+            device_map="auto" if device == "cuda" else None,
+            low_cpu_mem_usage=True
+        )
+        if device == "cpu":
+            model = model.to(device)
+        print("Model loaded successfully!")
+        return processor, model
+    except Exception as e:
+        print(f"Error loading model: {e}")
+        return None, None
 processor, model = load_model()
 def extract_text_from_image(image):
+    if processor is None or model is None:
+        return "Model failed to load. The model might be too large for this environment."
     try:
+        if image is None:
+            return "Please upload an image first."
+        # Convert and process image
+        image = image.convert('RGB')
+        inputs = processor(images=image, return_tensors="pt").to(device)
+        # Generate with optimizations
         with torch.no_grad():
+            outputs = model.generate(
                 **inputs,
+                max_new_tokens=256,  # Reduced for faster processing
                 do_sample=False,
+                num_beams=1  # Faster but less accurate
             )
+        text = processor.decode(outputs[0], skip_special_tokens=True)
+        return text
     except Exception as e:
+        return f"Error: {str(e)}"
 demo = gr.Interface(
+    extract_text_from_image,
+    gr.Image(type="pil"),
+    gr.Textbox(lines=5),
+    title="OLM OCR"
 )
 if __name__ == "__main__":
+    demo.launch()