Spaces:

Aumkeshchy2003
/

Gradio-OCR

Sleeping

File size: 1,258 Bytes

d4640a8
ea0a0ae
794e69a
 
 
 
e2eafa6
 
fb7988f
e2eafa6
 
 
 
 
 
 
206b3c8
e2eafa6
 
ea0a0ae
e2eafa6
ea0a0ae
814690d
e2eafa6
1c9cf55
814690d
ea0a0ae
d66c9c9
e2eafa6
 
d66c9c9
206b3c8
814690d
 
 
04c7dbc
 
814690d
 
e2eafa6

from typing import List

import pytesseract
from PIL import Image
import gradio as gr

def tesseract_ocr(filepath: str, keyword: str):
    # Load the image and perform OCR
    image = Image.open(filepath)
    extracted_text = pytesseract.image_to_string(image=image)
    
    # Highlight the keyword in the extracted text
    if keyword:
        highlighted_text = extracted_text.replace(keyword, f"<mark>{keyword}</mark>")
    else:
        highlighted_text = extracted_text
    
    return highlighted_text

title = "Tesseract OCR"
description = "Gradio demo for Tesseract. Tesseract is an open-source text recognition (OCR) Engine."
article = "<p style='text-align: center'><a href='https://tesseract-ocr.github.io/' target='_blank'>Tesseract documentation</a> | <a href='https://github.com/tesseract-ocr/tesseract' target='_blank'>Github Repo</a></p>"



demo = gr.Interface(
    fn=tesseract_ocr, 
    inputs=[
        gr.Image(type="filepath", label="Upload Image for OCR"), 
        gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")  # Keyword input
        ],
    outputs='html',
    title=title,
    description=description,
    article=article,
)

if __name__ == '__main__':
    demo.launch()
    print("Finished running")