Ocr1 / app.py
Rick7799's picture
Update app.py
3dad239 verified
raw
history blame
1.79 kB
import gradio as gr
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from PIL import Image
# Load the ColPali model and tokenizer from Hugging Face
model_name = "vidore/colpali-v1.2" # Use the correct model identifier
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
def extract_and_search(image, keyword):
try:
# Convert image to RGB if it's not already in that format
if image.mode != 'RGB':
image = image.convert('RGB')
# Preprocess image: convert to tensor format required by the model
inputs = tokenizer(images=image, return_tensors="pt") # Adjust as necessary for your input requirements
# Extract text from image using ColPali model
with torch.no_grad(): # Disable gradient calculation for inference
outputs = model.generate(**inputs)
# Decode outputs to text
extracted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Perform keyword search
matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()]
return extracted_text, matching_lines
except Exception as e:
return f"Error during extraction: {str(e)}", []
# Create Gradio interface
interface = gr.Interface(
fn=extract_and_search,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Enter Keyword")
],
outputs=[
gr.Textbox(label="Extracted Text"),
gr.Textbox(label="Matching Lines")
],
title="ColPali OCR with Keyword Search",
description="Upload an image and enter a keyword to search within the extracted text."
)
# Launch the app
interface.launch(share=True)