Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
@@ -5,9 +5,18 @@ from PIL import ImageDraw
|
|
5 |
import gradio as gr
|
6 |
import torch
|
7 |
import easyocr
|
|
|
8 |
|
9 |
-
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def draw_boxes(image, bounds, color='yellow', width=2):
|
13 |
draw = ImageDraw.Draw(image)
|
@@ -16,18 +25,24 @@ def draw_boxes(image, bounds, color='yellow', width=2):
|
|
16 |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
|
17 |
return image
|
18 |
|
19 |
-
def inference(
|
20 |
reader = easyocr.Reader(lang)
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
title = 'EasyOCR'
|
28 |
description = 'Realtime EasyOCR.'
|
29 |
article = "<p style='text-align: center'><a href='https://www.jaided.ai/easyocr/'>OCR for written scripts.</a> | <a href='https://github.com/JaidedAI/EasyOCR'>Github Repo</a></p>"
|
30 |
-
examples = [['
|
31 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
32 |
choices = [
|
33 |
"en",
|
@@ -35,12 +50,12 @@ choices = [
|
|
35 |
]
|
36 |
gr.Interface(
|
37 |
inference,
|
38 |
-
[gr.inputs.
|
39 |
-
[gr.outputs.
|
40 |
title=title,
|
41 |
description=description,
|
42 |
article=article,
|
43 |
examples=examples,
|
44 |
css=css,
|
45 |
enable_queue=True
|
46 |
-
).launch(debug=True)
|
|
|
5 |
import gradio as gr
|
6 |
import torch
|
7 |
import easyocr
|
8 |
+
import fitz # PyMuPDF
|
9 |
|
10 |
+
# Function to extract images from PDF
|
11 |
+
def pdf_to_images(pdf_path):
|
12 |
+
doc = fitz.open(pdf_path)
|
13 |
+
images = []
|
14 |
+
for page_num in range(len(doc)):
|
15 |
+
page = doc.load_page(page_num)
|
16 |
+
pix = page.get_pixmap()
|
17 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
18 |
+
images.append(img)
|
19 |
+
return images
|
20 |
|
21 |
def draw_boxes(image, bounds, color='yellow', width=2):
|
22 |
draw = ImageDraw.Draw(image)
|
|
|
25 |
draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
|
26 |
return image
|
27 |
|
28 |
+
def inference(pdf, lang):
|
29 |
reader = easyocr.Reader(lang)
|
30 |
+
images = pdf_to_images(pdf.name)
|
31 |
+
results = []
|
32 |
+
for i, img in enumerate(images):
|
33 |
+
img_path = f'page_{i + 1}.jpg'
|
34 |
+
img.save(img_path)
|
35 |
+
bounds = reader.readtext(img_path)
|
36 |
+
draw_boxes(img, bounds)
|
37 |
+
result_img_path = f'result_{i + 1}.jpg'
|
38 |
+
img.save(result_img_path)
|
39 |
+
results.append((result_img_path, pd.DataFrame(bounds).iloc[:, 1:]))
|
40 |
+
return results
|
41 |
|
42 |
title = 'EasyOCR'
|
43 |
description = 'Realtime EasyOCR.'
|
44 |
article = "<p style='text-align: center'><a href='https://www.jaided.ai/easyocr/'>OCR for written scripts.</a> | <a href='https://github.com/JaidedAI/EasyOCR'>Github Repo</a></p>"
|
45 |
+
examples = [['example.pdf',['en']]]
|
46 |
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
|
47 |
choices = [
|
48 |
"en",
|
|
|
50 |
]
|
51 |
gr.Interface(
|
52 |
inference,
|
53 |
+
[gr.inputs.File(type='file', label='Input PDF'), gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='language')],
|
54 |
+
[gr.outputs.File(type='file', label='Output Images'), gr.outputs.Dataframe(headers=['text', 'confidence'])],
|
55 |
title=title,
|
56 |
description=description,
|
57 |
article=article,
|
58 |
examples=examples,
|
59 |
css=css,
|
60 |
enable_queue=True
|
61 |
+
).launch(debug=True)
|