gaurav2004's picture
Update app.py
d5c405e verified
raw
history blame
No virus
1.57 kB
import os
os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
from PIL import Image
import pytesseract
import gradio as gr
# Function to extract text and search for a keyword
def ocr_with_search(image, keyword):
# Extract text using OCR (supports Hindi and English)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
text = pytesseract.image_to_string(image, lang='hin+eng')
# Split text into lines and search for keyword
matched_lines = [line for line in text.split('\n') if keyword.lower() in line.lower()]
# Return extracted text and matching lines
return text, '\n'.join(matched_lines)
# Gradio interface to upload image and search for keywords
def create_interface():
# Create Gradio interface
iface = gr.Interface(
fn=ocr_with_search, # Function for OCR and search
inputs=[
gr.Image(type="pil", label="Upload Image"), # Image upload
gr.Textbox(placeholder="Enter keyword to search", label="Keyword Search") # Keyword search input
],
outputs=[
gr.Textbox(label="Extracted Text"), # Full extracted text
gr.Textbox(label="Search Results") # Matching keyword results
],
title="Hindi and English OCR with Keyword Search",
description="Upload an image containing Hindi and English text. Extract the text and search for keywords."
)
# Launch the interface
iface.launch(share=True)
# Run the Gradio interface
if __name__ == "__main__":
create_interface()