|
import gradio as gr |
|
import torch |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
from PIL import Image |
|
|
|
|
|
model_name = "vidore/colpali-v1.2" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSeq2SeqLM.from_pretrained(model_name) |
|
|
|
def extract_and_search(image, keyword): |
|
try: |
|
|
|
if image.mode != 'RGB': |
|
image = image.convert('RGB') |
|
|
|
|
|
inputs = tokenizer(images=image, return_tensors="pt") |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model.generate(**inputs) |
|
|
|
|
|
extracted_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()] |
|
|
|
return extracted_text, matching_lines |
|
except Exception as e: |
|
return f"Error during extraction: {str(e)}", [] |
|
|
|
|
|
interface = gr.Interface( |
|
fn=extract_and_search, |
|
inputs=[ |
|
gr.Image(type="pil", label="Upload Image"), |
|
gr.Textbox(label="Enter Keyword") |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Extracted Text"), |
|
gr.Textbox(label="Matching Lines") |
|
], |
|
title="ColPali OCR with Keyword Search", |
|
description="Upload an image and enter a keyword to search within the extracted text." |
|
) |
|
|
|
|
|
interface.launch(share=True) |