sotirios-slv commited on
Commit
8b6c55a
1 Parent(s): ed6a232

Basic implmentaiton of Tesseract copied from HF example

Browse files
Files changed (3) hide show
  1. app.py +52 -0
  2. packages.txt +1 -0
  3. requirements.txt +2 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import pytesseract
3
+ import gradio as gr
4
+ import os
5
+
6
+ langs = []
7
+
8
+ choices = os.popen("tesseract --list-langs").read().split("\n")[1:-1]
9
+
10
+ blocks = gr.Blocks()
11
+
12
+
13
+ # If you don't have tesseract executable in your PATH, include the following:
14
+ # pytesseract.pytesseract.tesseract_cmd = r'<full_path_to_your_tesseract_executable>'
15
+ # Example tesseract_cmd = r'C:\Program Files (x86)\Tesseract-OCR\tesseract'
16
+
17
+ # Simple image to string
18
+ # print(pytesseract.image_to_string(Image.open('eurotext.png')))
19
+
20
+ # # French text image to string
21
+ # print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra'))
22
+
23
+ # # Get bounding box estimates
24
+ # print(pytesseract.image_to_boxes(Image.open('test.png')))
25
+
26
+ # # Get verbose data including boxes, confidences, line and page numbers
27
+ # print(pytesseract.image_to_data(Image.open('test.png')))
28
+
29
+ # # Get information about orientation and script detection
30
+ # print(pytesseract.image_to_osd(Image.open('test.png'))
31
+
32
+
33
+ def run(image, lang=None):
34
+ result = pytesseract.image_to_string(image, lang=None if lang == [] else lang)
35
+ return result
36
+
37
+
38
+ with gr.Blocks() as demo:
39
+ gr.Markdown("## Theatre Programmer")
40
+ with gr.Row():
41
+ with gr.Column():
42
+ image_in = gr.Image(type="pil")
43
+ lang = gr.Dropdown(choices)
44
+ btn = gr.Button("Run")
45
+ with gr.Column():
46
+ text_out = gr.TextArea()
47
+
48
+ # examples = gr.Examples([["./eurotext.png", None]], fn=run, inputs=[
49
+ # image_in, lang], outputs=[text_out], cache_examples=False)
50
+ btn.click(fn=run, inputs=[image_in, lang], outputs=[text_out])
51
+
52
+ demo.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ tesseract-ocr-all
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio
2
+ pytesseract