Aumkeshchy2003 commited on
Commit
6916c84
·
verified ·
1 Parent(s): 76a8b7b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -16
app.py CHANGED
@@ -4,10 +4,12 @@ from PIL import Image
4
  import re
5
  import gradio as gr
6
 
7
- def tesseract_ocr(filepath: str, languages: List[str]) -> str:
8
- """Extract text from the image using Tesseract OCR."""
9
  image = Image.open(filepath)
10
- extracted_text = pytesseract.image_to_string(image=image, lang=', '.join(languages))
 
 
11
  return extracted_text
12
 
13
  def search_and_highlight(text: str, keyword: str) -> str:
@@ -17,13 +19,15 @@ def search_and_highlight(text: str, keyword: str) -> str:
17
  return highlighted_text
18
  return text
19
 
20
- def ocr_and_search(filepath: str, keyword: str, languages: List[str]) -> str:
21
  """Perform OCR on the image and highlight the specified keyword."""
22
  if filepath is None:
23
  return "Please upload an image."
24
 
25
- extracted_text = tesseract_ocr(filepath, languages)
 
26
 
 
27
  if keyword:
28
  highlighted_text = search_and_highlight(extracted_text, keyword)
29
  return highlighted_text
@@ -31,26 +35,20 @@ def ocr_and_search(filepath: str, keyword: str, languages: List[str]) -> str:
31
  return extracted_text
32
 
33
  # Gradio Interface
34
- title = "Tesseract OCR"
35
- description = "Gradio demo for Tesseract."
36
- article = "<p>Upload an image and optionally highlight keywords.</p>"
37
-
38
- # Get available languages for Tesseract
39
- language_choices = pytesseract.get_languages()
40
 
41
  demo = gr.Interface(
42
  fn=ocr_and_search,
43
  inputs=[
44
  gr.Image(type="filepath", label="Upload Image for OCR"),
45
- gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword..."),
46
- gr.CheckboxGroup(choices=language_choices, label="Select OCR Language(s)", value=['eng']) # Added language selection
47
  ],
48
  outputs='html', # Changed to 'html' to display highlighted text
49
  title=title,
50
- description=description,
51
- article=article
52
  )
53
 
54
  if __name__ == '__main__':
55
  demo.launch()
56
- print("Finished running")
 
4
  import re
5
  import gradio as gr
6
 
7
+ def tesseract_ocr(filepath: str) -> str:
8
+ """Extract text from the image using Tesseract OCR with both English and Hindi."""
9
  image = Image.open(filepath)
10
+ # Set languages to English and Hindi by default
11
+ combined_languages = 'eng+hin'
12
+ extracted_text = pytesseract.image_to_string(image=image, lang=combined_languages)
13
  return extracted_text
14
 
15
  def search_and_highlight(text: str, keyword: str) -> str:
 
19
  return highlighted_text
20
  return text
21
 
22
+ def ocr_and_search(filepath: str, keyword: str) -> str:
23
  """Perform OCR on the image and highlight the specified keyword."""
24
  if filepath is None:
25
  return "Please upload an image."
26
 
27
+ # Perform OCR (with default English and Hindi languages)
28
+ extracted_text = tesseract_ocr(filepath)
29
 
30
+ # Highlight the keyword if provided
31
  if keyword:
32
  highlighted_text = search_and_highlight(extracted_text, keyword)
33
  return highlighted_text
 
35
  return extracted_text
36
 
37
  # Gradio Interface
38
+ title = "Tesseract OCR (English + Hindi)"
39
+ description = "Gradio demo for Tesseract with multi-language support (English and Hindi)."
 
 
 
 
40
 
41
  demo = gr.Interface(
42
  fn=ocr_and_search,
43
  inputs=[
44
  gr.Image(type="filepath", label="Upload Image for OCR"),
45
+ gr.Textbox(label="Keyword to Highlight", placeholder="Enter a keyword...")
 
46
  ],
47
  outputs='html', # Changed to 'html' to display highlighted text
48
  title=title,
49
+ description=description
 
50
  )
51
 
52
  if __name__ == '__main__':
53
  demo.launch()
54
+ print("Finished running")