Spaces:

Rick7799
/

Ocr1

Running

Rick7799 commited on Sep 28, 2024

Commit

3dad239

verified ·

1 Parent(s): 8c018f3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,18 +1,35 @@
 import gradio as gr
-from transformers import RAGMultiModalModel  # Importing the ColPali model
-# Initialize the ColPali model
-model = RAGMultiModalModel.from_pretrained("vidore/colpali-v1.2")
 def extract_and_search(image, keyword):
-    # Use the model to extract text from the image
-    inputs = {"images": [image]}
-    extracted_text = model.generate(**inputs)  # Replace with actual prediction method
-    # Perform keyword search
-    matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()]
-    return extracted_text, matching_lines
 # Create Gradio interface
 interface = gr.Interface(

 import gradio as gr
+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+from PIL import Image
+# Load the ColPali model and tokenizer from Hugging Face
+model_name = "vidore/colpali-v1.2"  # Use the correct model identifier
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 def extract_and_search(image, keyword):
+    try:
+        # Convert image to RGB if it's not already in that format
+        if image.mode != 'RGB':
+            image = image.convert('RGB')
+        # Preprocess image: convert to tensor format required by the model
+        inputs = tokenizer(images=image, return_tensors="pt")  # Adjust as necessary for your input requirements
+        # Extract text from image using ColPali model
+        with torch.no_grad():  # Disable gradient calculation for inference
+            outputs = model.generate(**inputs)
+        # Decode outputs to text
+        extracted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # Perform keyword search
+        matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()]
+        return extracted_text, matching_lines
+    except Exception as e:
+        return f"Error during extraction: {str(e)}", []
 # Create Gradio interface
 interface = gr.Interface(