OCR_Model / app.py
ak-ml18's picture
Update app.py
b268eea verified
# Step 1: Import Required Libraries
import requests
from PIL import Image
from transformers import pipeline
import gradio as gr
import re # Import regular expressions module for case-insensitive keyword matching
# Step 2: Define a Function to Perform OCR
def perform_ocr(image):
ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-handwritten")
extracted_text = ocr_pipeline(image)
return extracted_text[0]['generated_text'] if extracted_text else ""
# Step 3: Define Function to Highlight Keyword
def highlight_keyword(text, keyword):
if not keyword: # Check if the keyword is empty
return "No keyword was entered."
# Ensure case-insensitivity while highlighting (preserving original casing)
keyword_pattern = re.compile(re.escape(keyword), re.IGNORECASE) # Create case-insensitive pattern
# Check if the keyword exists in the text
if keyword_pattern.search(text):
highlighted_text = keyword_pattern.sub(lambda match: f"<mark>{match.group(0)}</mark>", text)
return highlighted_text
else:
return f"Keyword '{keyword}' not found in the extracted text."
# Step 4: Define Gradio Interface Function
def ocr_and_highlight(image, keyword):
extracted_text = perform_ocr(image)
keyword_result = highlight_keyword(extracted_text, keyword)
return extracted_text, keyword_result
# Step 5: Create Gradio Interface
interface = gr.Interface(
fn=ocr_and_highlight,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Enter Keyword (optional)")
],
outputs=[
gr.Textbox(label="Extracted Text", interactive=False),
gr.HTML(label="Keyword Result") # Changed to HTML to allow highlighting
],
title="OCR Text Extractor with Keyword Highlighting",
description="Upload an image to extract text and highlight a specified keyword. If no keyword is entered, the app will notify you."
)
# Step 6: Launch the Gradio Interface
interface.launch(share=True)