Spaces:
Running
Running
File size: 3,210 Bytes
87c3140 93fd830 87c3140 e9f4039 87c3140 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
import os, io, sys, inspect
from google.cloud import vision, storage
from PIL import Image, ImageDraw
currentdir = os.path.dirname(os.path.abspath(
inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.append(parentdir)
def draw_boxes(image, bounds, color):
if bounds:
draw = ImageDraw.Draw(image)
width, height = image.size
line_width = int((width + height) / 2 * 0.001) # This sets the line width as 0.5% of the average dimension
for bound in bounds:
draw.polygon(
[
bound["vertices"][0]["x"], bound["vertices"][0]["y"],
bound["vertices"][1]["x"], bound["vertices"][1]["y"],
bound["vertices"][2]["x"], bound["vertices"][2]["y"],
bound["vertices"][3]["x"], bound["vertices"][3]["y"],
],
outline=color,
width=line_width
)
return image
def detect_text(path, client):
# client = vision.ImageAnnotatorClient()
with io.open(path, 'rb') as image_file:
content = image_file.read()
image = vision.Image(content=content)
response = client.document_text_detection(image=image)
texts = response.text_annotations
if response.error.message:
raise Exception(
'{}\nFor more info on error messages, check: '
'https://cloud.google.com/apis/design/errors'.format(
response.error.message))
# Extract bounding boxes
bounds = []
text_to_box_mapping = {}
for text in texts[1:]: # Skip the first entry, as it represents the entire detected text
# Convert BoundingPoly to dictionary
bound_dict = {
"vertices": [
{"x": vertex.x, "y": vertex.y} for vertex in text.bounding_poly.vertices
]
}
bounds.append(bound_dict)
text_to_box_mapping[str(bound_dict)] = text.description
if texts:
# cleaned_text = texts[0].description.replace("\n", " ").replace("\t", " ").replace("|", " ")
cleaned_text = texts[0].description
return cleaned_text, bounds, text_to_box_mapping
else:
return '', None, None
def overlay_boxes_on_image(path, bounds,do_create_OCR_helper_image):
if do_create_OCR_helper_image:
image = Image.open(path)
draw_boxes(image, bounds, "green")
return image
else:
image = Image.open(path)
return image
# ''' Google Vision'''
# def detect_text(path):
# """Detects text in the file located in the local filesystem."""
# client = vision.ImageAnnotatorClient()
# with io.open(path, 'rb') as image_file:
# content = image_file.read()
# image = vision.Image(content=content)
# response = client.document_text_detection(image=image)
# texts = response.text_annotations
# if response.error.message:
# raise Exception(
# '{}\nFor more info on error messages, check: '
# 'https://cloud.google.com/apis/design/errors'.format(
# response.error.message))
# return texts[0].description if texts else ''
|