Aumkeshchy2003 commited on
Commit
e2eafa6
1 Parent(s): d66c9c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -2,24 +2,32 @@ from typing import List
2
 
3
  import pytesseract
4
  from PIL import Image
5
-
6
  import gradio as gr
7
 
8
- def tesseract_ocr(filepath: str, languages: List[str]):
 
9
  image = Image.open(filepath)
10
- return pytesseract.image_to_string(image=image, lang=', '.join(languages))
 
 
 
 
 
 
11
 
 
 
12
  title = "Tesseract OCR"
13
- description = "Gradio demo for Tesseract. Tesseract is an open source text recognition (OCR) Engine."
14
  article = "<p style='text-align: center'><a href='https://tesseract-ocr.github.io/' target='_blank'>Tesseract documentation</a> | <a href='https://github.com/tesseract-ocr/tesseract' target='_blank'>Github Repo</a></p>"
15
 
16
- language_choices = pytesseract.get_languages()
17
 
18
  demo = gr.Interface(
19
  fn=tesseract_ocr,
20
  inputs=[
21
- gr.Image(type="filepath", label="Input"),
22
- gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='language')
23
  ],
24
  outputs='html',
25
  title=title,
@@ -29,4 +37,4 @@ demo = gr.Interface(
29
 
30
  if __name__ == '__main__':
31
  demo.launch()
32
- print("Finished running")
 
2
 
3
  import pytesseract
4
  from PIL import Image
 
5
  import gradio as gr
6
 
7
+ def tesseract_ocr(filepath: str, keyword: str):
8
+ # Load the image and perform OCR
9
  image = Image.open(filepath)
10
+ extracted_text = pytesseract.image_to_string(image=image)
11
+
12
+ # Highlight the keyword in the extracted text
13
+ if keyword:
14
+ highlighted_text = extracted_text.replace(keyword, f"<mark>{keyword}</mark>")
15
+ else:
16
+ highlighted_text = extracted_text
17
 
18
+ return highlighted_text
19
+
20
  title = "Tesseract OCR"
21
+ description = "Gradio demo for Tesseract. Tesseract is an open-source text recognition (OCR) Engine."
22
  article = "<p style='text-align: center'><a href='https://tesseract-ocr.github.io/' target='_blank'>Tesseract documentation</a> | <a href='https://github.com/tesseract-ocr/tesseract' target='_blank'>Github Repo</a></p>"
23
 
24
+
25
 
26
  demo = gr.Interface(
27
  fn=tesseract_ocr,
28
  inputs=[
29
+ gr.Image(type="filepath", label="Upload Image for OCR"),
30
+ gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...") # Keyword input
31
  ],
32
  outputs='html',
33
  title=title,
 
37
 
38
  if __name__ == '__main__':
39
  demo.launch()
40
+ print("Finished running")