Spaces:

rumman19
/

ocrr

Sleeping

File size: 1,078 Bytes

e94bb18

import gradio as gr
import pytesseract
from PIL import Image

# Set Tesseract path if needed
# pytesseract.pytesseract.tesseract_cmd = "/path/to/tesseract"  # Update if needed

def extract_text_from_image(image_path):
    img = Image.open(image_path)
    text = pytesseract.image_to_string(img, lang="eng+hin")
    return text

def ocr_and_search(image, keyword):
    # Extract text from the uploaded image
    text = extract_text_from_image(image)
    
    # Highlight the keyword in red if found
    if keyword.lower() in text.lower():
        # Use case-insensitive replacement by matching case
        highlighted_text = text.replace(
            keyword, f'<span style="color: red; font-weight: bold;">{keyword}</span>'
        )
    else:
        highlighted_text = "Keyword not found"
    
    return text, highlighted_text

# Define Gradio interface
app = gr.Interface(
    fn=ocr_and_search,
    inputs=[gr.Image(type="filepath"), gr.Textbox(label="Keyword")],
    outputs=[gr.Textbox(label="Extracted Text"), gr.HTML(label="Search Results")]
)

app.launch(share=True)