Charlie Li commited on
Commit
8011742
β€’
1 Parent(s): a689166
Files changed (3) hide show
  1. README copy.md +12 -0
  2. app.py +27 -0
  3. requirements.txt +3 -0
README copy.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Pytesseract Test
3
+ emoji: πŸ‘
4
+ colorFrom: gray
5
+ colorTo: gray
6
+ sdk: gradio
7
+ sdk_version: 4.16.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pytesseract
3
+ from PIL import Image, ImageDraw
4
+
5
+ def extract_text_and_boxes(image):
6
+ data = pytesseract.image_to_data(image, output_type=pytesseract.Output.DICT)
7
+ draw = ImageDraw.Draw(image)
8
+
9
+ boxes_and_words = []
10
+
11
+ for i in range(len(data['text'])):
12
+ if data['text'][i].strip() != '': # Filters out empty text results
13
+ x, y, w, h = data['left'][i], data['top'][i], data['width'][i], data['height'][i]
14
+ word = data['text'][i]
15
+ boxes_and_words.append({'box': (x, y, w, h), 'word': word})
16
+ draw.rectangle([x, y, x + w, y + h], outline='red')
17
+
18
+ return image, boxes_and_words
19
+
20
+ iface = gr.Interface(fn=extract_text_and_boxes,
21
+ inputs=gr.Image(type='pil'),
22
+ outputs=[gr.Image(type='pil', label="Image with Bounding Boxes"),
23
+ gr.JSON(label="Extracted Words and Boxes")],
24
+ title="Test Tesseract",
25
+ description="Test PyTesseract.")
26
+
27
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ pytesseract
3
+ Pillow