Spaces:

gaurav2004
/

gradio-imagetotext-OCR

Running

Update app.py

d5c405e verified about 13 hours ago

No virus

1.57 kB

	import os
	os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
	from PIL import Image
	import pytesseract
	import gradio as gr




	# Function to extract text and search for a keyword
	def ocr_with_search(image, keyword):
	# Extract text using OCR (supports Hindi and English)
	pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

	text = pytesseract.image_to_string(image, lang='hin+eng')

	# Split text into lines and search for keyword
	matched_lines = [line for line in text.split('\n') if keyword.lower() in line.lower()]

	# Return extracted text and matching lines
	return text, '\n'.join(matched_lines)

	# Gradio interface to upload image and search for keywords
	def create_interface():
	# Create Gradio interface
	iface = gr.Interface(
	fn=ocr_with_search, # Function for OCR and search
	inputs=[
	gr.Image(type="pil", label="Upload Image"), # Image upload
	gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search") # Keyword search input
	],
	outputs=[
	gr.Textbox(label="Extracted Text"), # Full extracted text
	gr.Textbox(label="Search Results") # Matching keyword results
	],
	title="Hindi and English OCR with Keyword Search",
	description="Upload an image containing Hindi and English text. Extract the text and search for keywords."
	)

	# Launch the interface
	iface.launch(share=True)

	# Run the Gradio interface
	if __name__ == "__main__":
	create_interface()