|
|
|
|
|
import requests |
|
from PIL import Image |
|
from transformers import pipeline |
|
import gradio as gr |
|
import re |
|
|
|
|
|
|
|
def perform_ocr(image): |
|
ocr_pipeline = pipeline("image-to-text", model="microsoft/trocr-base-handwritten") |
|
extracted_text = ocr_pipeline(image) |
|
return extracted_text[0]['generated_text'] if extracted_text else "" |
|
|
|
|
|
|
|
def highlight_keyword(text, keyword): |
|
if not keyword: |
|
return "No keyword was entered." |
|
|
|
|
|
keyword_pattern = re.compile(re.escape(keyword), re.IGNORECASE) |
|
|
|
|
|
if keyword_pattern.search(text): |
|
highlighted_text = keyword_pattern.sub(lambda match: f"<mark>{match.group(0)}</mark>", text) |
|
return highlighted_text |
|
else: |
|
return f"Keyword '{keyword}' not found in the extracted text." |
|
|
|
|
|
|
|
def ocr_and_highlight(image, keyword): |
|
extracted_text = perform_ocr(image) |
|
keyword_result = highlight_keyword(extracted_text, keyword) |
|
return extracted_text, keyword_result |
|
|
|
|
|
|
|
interface = gr.Interface( |
|
fn=ocr_and_highlight, |
|
inputs=[ |
|
gr.Image(type="pil", label="Upload Image"), |
|
gr.Textbox(label="Enter Keyword (optional)") |
|
], |
|
outputs=[ |
|
gr.Textbox(label="Extracted Text", interactive=False), |
|
gr.HTML(label="Keyword Result") |
|
], |
|
title="OCR Text Extractor with Keyword Highlighting", |
|
description="Upload an image to extract text and highlight a specified keyword. If no keyword is entered, the app will notify you." |
|
) |
|
|
|
|
|
|
|
interface.launch(share=True) |
|
|