Spaces:

Rick7799
/

Ocr1

Sleeping

App Files Files Community

Ocr1 / app.py

Rick7799

Update app.py

3dad239 verified 8 months ago

raw

history blame

1.79 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
	from PIL import Image

	# Load the ColPali model and tokenizer from Hugging Face
	model_name = "vidore/colpali-v1.2" # Use the correct model identifier
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

	def extract_and_search(image, keyword):
	try:
	# Convert image to RGB if it's not already in that format
	if image.mode != 'RGB':
	image = image.convert('RGB')

	# Preprocess image: convert to tensor format required by the model
	inputs = tokenizer(images=image, return_tensors="pt") # Adjust as necessary for your input requirements

	# Extract text from image using ColPali model
	with torch.no_grad(): # Disable gradient calculation for inference
	outputs = model.generate(**inputs)

	# Decode outputs to text
	extracted_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Perform keyword search
	matching_lines = [line for line in extracted_text.splitlines() if keyword.lower() in line.lower()]

	return extracted_text, matching_lines
	except Exception as e:
	return f"Error during extraction: {str(e)}", []

	# Create Gradio interface
	interface = gr.Interface(
	fn=extract_and_search,
	inputs=[
	gr.Image(type="pil", label="Upload Image"),
	gr.Textbox(label="Enter Keyword")
	],
	outputs=[
	gr.Textbox(label="Extracted Text"),
	gr.Textbox(label="Matching Lines")
	],
	title="ColPali OCR with Keyword Search",
	description="Upload an image and enter a keyword to search within the extracted text."
	)

	# Launch the app
	interface.launch(share=True)