Spaces:

rumman19
/

ocrr

Sleeping

ocrr / app.py

Create app.py

e94bb18 verified 7 months ago

1.08 kB

	import gradio as gr
	import pytesseract
	from PIL import Image

	# Set Tesseract path if needed
	# pytesseract.pytesseract.tesseract_cmd = "/path/to/tesseract" # Update if needed

	def extract_text_from_image(image_path):
	img = Image.open(image_path)
	text = pytesseract.image_to_string(img, lang="eng+hin")
	return text

	def ocr_and_search(image, keyword):
	# Extract text from the uploaded image
	text = extract_text_from_image(image)

	# Highlight the keyword in red if found
	if keyword.lower() in text.lower():
	# Use case-insensitive replacement by matching case
	highlighted_text = text.replace(
	keyword, f'<span style="color: red; font-weight: bold;">{keyword}</span>'
	)
	else:
	highlighted_text = "Keyword not found"

	return text, highlighted_text

	# Define Gradio interface
	app = gr.Interface(
	fn=ocr_and_search,
	inputs=[gr.Image(type="filepath"), gr.Textbox(label="Keyword")],
	outputs=[gr.Textbox(label="Extracted Text"), gr.HTML(label="Search Results")]
	)

	app.launch(share=True)