File size: 3,210 Bytes
87c3140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93fd830
 
87c3140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9f4039
 
 
 
 
 
 
 
87c3140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os, io, sys, inspect
from google.cloud import vision, storage
from PIL import Image, ImageDraw

currentdir = os.path.dirname(os.path.abspath(
    inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)

def draw_boxes(image, bounds, color):
    if bounds:
        draw = ImageDraw.Draw(image)
        width, height = image.size
        line_width = int((width + height) / 2 * 0.001)  # This sets the line width as 0.5% of the average dimension

        for bound in bounds:
            draw.polygon(
                [
                    bound["vertices"][0]["x"], bound["vertices"][0]["y"],
                    bound["vertices"][1]["x"], bound["vertices"][1]["y"],
                    bound["vertices"][2]["x"], bound["vertices"][2]["y"],
                    bound["vertices"][3]["x"], bound["vertices"][3]["y"],
                ],
                outline=color,
                width=line_width
            )
    return image

def detect_text(path, client):
    # client = vision.ImageAnnotatorClient()
    with io.open(path, 'rb') as image_file:
        content = image_file.read()
    image = vision.Image(content=content)
    response = client.document_text_detection(image=image)
    texts = response.text_annotations

    if response.error.message:
        raise Exception(
            '{}\nFor more info on error messages, check: '
            'https://cloud.google.com/apis/design/errors'.format(
                response.error.message))

    # Extract bounding boxes
    bounds = []
    text_to_box_mapping = {}
    for text in texts[1:]:  # Skip the first entry, as it represents the entire detected text
        # Convert BoundingPoly to dictionary
        bound_dict = {
            "vertices": [
                {"x": vertex.x, "y": vertex.y} for vertex in text.bounding_poly.vertices
            ]
        }
        bounds.append(bound_dict)
        text_to_box_mapping[str(bound_dict)] = text.description

    if texts:
        # cleaned_text = texts[0].description.replace("\n", " ").replace("\t", " ").replace("|", " ")
        cleaned_text = texts[0].description
        return cleaned_text, bounds, text_to_box_mapping
    else:
        return '', None, None
    
def overlay_boxes_on_image(path, bounds,do_create_OCR_helper_image):
    if do_create_OCR_helper_image:
        image = Image.open(path)
        draw_boxes(image, bounds, "green")
        return image
    else:
        image = Image.open(path)
        return image





















# ''' Google Vision'''
# def detect_text(path):
#     """Detects text in the file located in the local filesystem."""
#     client = vision.ImageAnnotatorClient()

#     with io.open(path, 'rb') as image_file:
#         content = image_file.read()

#     image = vision.Image(content=content)

#     response = client.document_text_detection(image=image)
#     texts = response.text_annotations

#     if response.error.message:
#         raise Exception(
#             '{}\nFor more info on error messages, check: '
#             'https://cloud.google.com/apis/design/errors'.format(
#                 response.error.message))

#     return texts[0].description if texts else ''