Charles Kabui commited on
Commit
68847fc
1 Parent(s): 1d814bd

rotating bboxes

Browse files
analysis.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
main.py CHANGED
@@ -32,7 +32,7 @@ visualize_bboxes_on_image_kwargs = {
32
  'label_rectangle_top_margin': 0
33
  }
34
  vectors_types = ['vectors', 'weighted_vectors',
35
- 'reduced_vectors', 'weighted_reduced_vectors']
36
 
37
 
38
  def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
 
32
  'label_rectangle_top_margin': 0
33
  }
34
  vectors_types = ['vectors', 'weighted_vectors',
35
+ 'reduced_vectors', 'reduced_weighted_vectors']
36
 
37
 
38
  def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):
utils/flatten.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Iterable, Literal
2
+ import sys
3
+
4
+
5
+ def flatten(iterable: Iterable, depth = sys.maxsize, return_type: Literal['list', 'generator'] = 'list') -> list | Iterable:
6
+ """
7
+ Flatten a nested iterable up to a specified depth.
8
+
9
+ Args:
10
+ iterable (iterable): The iterable to be expanded.
11
+ depth (int, optional): The depth to which the iterable should be expanded.
12
+ Defaults to 1.
13
+ return_type (Literal['list', 'generator'], optional): The type of the return value.
14
+ Defaults to 'list'.
15
+ Yields:
16
+ The expanded elements.
17
+ """
18
+
19
+ def expand(item, current_depth=0):
20
+ if current_depth == depth:
21
+ yield item
22
+ elif isinstance(item, (list, tuple, set)):
23
+ for sub_item in item:
24
+ yield from expand(sub_item, current_depth + 1)
25
+ else:
26
+ yield item
27
+
28
+ def generator():
29
+ for item in iterable:
30
+ yield from expand(item)
31
+
32
+ if return_type == 'list':
33
+ return list(generator())
34
+ return generator()
utils/get_features.py CHANGED
@@ -102,7 +102,7 @@ def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_name
102
  weighted_jaccard_index = False,
103
  **reduced_predictions)
104
 
105
- weighted_reduced_vectors = get_vectors(
106
  sub_images_bboxes = sub_images_bboxes,
107
  label_names = label_names,
108
  weighted_jaccard_index = True,
@@ -119,5 +119,5 @@ def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_name
119
  'reduced_predicted_scores': reduced_predictions['predicted_scores'],
120
  'reduced_predicted_labels': reduced_predictions['predicted_labels'],
121
  'reduced_vectors': list(reduced_vectors),
122
- 'weighted_reduced_vectors': list(weighted_reduced_vectors),
123
  }
 
102
  weighted_jaccard_index = False,
103
  **reduced_predictions)
104
 
105
+ reduced_weighted_vectors = get_vectors(
106
  sub_images_bboxes = sub_images_bboxes,
107
  label_names = label_names,
108
  weighted_jaccard_index = True,
 
119
  'reduced_predicted_scores': reduced_predictions['predicted_scores'],
120
  'reduced_predicted_labels': reduced_predictions['predicted_labels'],
121
  'reduced_vectors': list(reduced_vectors),
122
+ 'reduced_weighted_vectors': list(reduced_weighted_vectors),
123
  }
utils/visualize_bboxes_on_image.py CHANGED
@@ -3,7 +3,7 @@ from urllib.parse import urlparse
3
  from PIL import Image, ImageDraw, ImageFont
4
  import numpy as np
5
  import requests
6
- from typing import List
7
  from functools import cache
8
  import matplotlib.colors as colors
9
 
@@ -33,7 +33,7 @@ def get_font(path_or_url: str = 'https://github.com/googlefonts/roboto/raw/main/
33
 
34
  named_colors_mapping = colors.get_named_colors_mapping()
35
  @cache
36
- def get_color(color: str | tuple) -> tuple | str:
37
  if isinstance(color, tuple):
38
  if len(color) == 2:
39
  real_color, alpha = (color[0], int(color[1]))
@@ -44,6 +44,31 @@ def get_color(color: str | tuple) -> tuple | str:
44
  return tuple(real_color_alpha)
45
  return color
46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def visualize_bboxes_on_image(
48
  image: Image.Image,
49
  bboxes: List[List[int]],
@@ -58,7 +83,8 @@ def visualize_bboxes_on_image(
58
  label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
59
  label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
60
  label_text_size=DEFAULTS["label_text_size"],
61
- convert_to_x0y0x1y1=None) -> Image.Image:
 
62
  '''
63
  Visualize bounding boxes on an image
64
  Args:
@@ -75,11 +101,11 @@ def visualize_bboxes_on_image(
75
  label_rectangle_top_margin: Top padding of the label rectangle
76
  label_text_size: Font size of the label text
77
  convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
 
78
  Returns:
79
  Image: Image annotated with bounding boxes
80
  '''
81
  image = image.copy().convert("RGB")
82
- draw = ImageDraw.Draw(image)
83
  font = get_font(size=label_text_size)
84
  labels = (labels or []) + np.full(len(bboxes) -
85
  len(labels or []), None).tolist()
@@ -91,34 +117,31 @@ def visualize_bboxes_on_image(
91
  for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
92
  x0, y0, x1, y1 = convert_to_x0y0x1y1(
93
  bbox) if convert_to_x0y0x1y1 is not None else bbox
94
- _bbox_fill_color = get_color(_bbox_fill_color)
95
- _bbox_outline_color = get_color(_bbox_outline_color)
96
- rectangle_image = Image.new('RGBA', image.size)
97
- rectangle_image_draw = ImageDraw.Draw(rectangle_image)
98
- rectangle_image_draw.rectangle(
99
- xy=[x0, y0, x1, y1],
100
- fill=_bbox_fill_color,
101
- outline=_bbox_outline_color,
102
- width=bbox_outline_width)
103
- image.paste(im=rectangle_image, mask=rectangle_image)
104
 
105
  if label is not None:
106
- draw_text_on_image(
107
- draw,
108
- [x0, y0],
109
- label,
110
- label_text_color,
111
- label_fill_color,
112
- label_text_padding,
113
- label_rectangle_left_margin,
114
- label_rectangle_top_margin,
115
- label_text_size,
116
- font)
 
117
  return image
118
 
119
-
120
  def draw_text_on_image(
121
- image_or_draw: Image.Image | ImageDraw.ImageDraw,
122
  text_position_xy: List[int],
123
  label: str,
124
  label_text_color=DEFAULTS["label_text_color"],
@@ -127,22 +150,28 @@ def draw_text_on_image(
127
  label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
128
  label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
129
  label_text_size=DEFAULTS["label_text_size"],
130
- font: ImageFont.FreeTypeFont = None) -> Image.Image:
131
- is_image = isinstance(image_or_draw, Image.Image)
132
- image = image_or_draw.copy().convert("RGB") if is_image else None
133
  font = font or get_font(size=label_text_size)
134
  x0, y0 = text_position_xy
135
- text_position = (x0 - label_rectangle_left_margin + label_text_padding,
136
- y0 - label_rectangle_top_margin + label_text_padding)
137
- draw = ImageDraw.Draw(image) if is_image else image_or_draw
138
- _, _, text_bbox_right, text_bbox_bottom = draw.textbbox(
139
- text_position, label, font=font)
140
  xy = [
141
  text_position[0] - label_text_padding,
142
  text_position[1] - label_text_padding,
143
  text_bbox_right + label_text_padding + label_text_padding,
144
  text_bbox_bottom + label_text_padding + label_text_padding
145
  ]
146
- draw.rectangle(xy, fill=label_fill_color)
147
- draw.text(text_position, label, font=font, fill=label_text_color)
 
 
 
 
 
 
148
  return image
 
3
  from PIL import Image, ImageDraw, ImageFont
4
  import numpy as np
5
  import requests
6
+ from typing import List, Callable
7
  from functools import cache
8
  import matplotlib.colors as colors
9
 
 
33
 
34
  named_colors_mapping = colors.get_named_colors_mapping()
35
  @cache
36
+ def parse_color(color: str | tuple) -> tuple | str:
37
  if isinstance(color, tuple):
38
  if len(color) == 2:
39
  real_color, alpha = (color[0], int(color[1]))
 
44
  return tuple(real_color_alpha)
45
  return color
46
 
47
+ def draw_bounding_box(
48
+ image: Image.Image,
49
+ bbox_outline_width: int,
50
+ bbox_fill_color: str | list[tuple | str],
51
+ bbox_outline_color: str | list[tuple | str],
52
+ bbox: List[List[int]],
53
+ label_rotate_angle: int = 0,
54
+ mask_callback: Callable[[ImageDraw.ImageDraw], None] = None) -> Image.Image:
55
+ options = {
56
+ 'xy': bbox,
57
+ 'fill': parse_color(bbox_fill_color) if bbox_fill_color else None,
58
+ 'outline': parse_color(bbox_outline_color) if bbox_outline_color else None,
59
+ 'width': bbox_outline_width
60
+ }
61
+ options = {k: v for k, v in options.items() if v is not None}
62
+ rectangle_image = Image.new('RGBA', image.size)
63
+ rectangle_image_draw = ImageDraw.Draw(rectangle_image)
64
+ rectangle_image_draw.rectangle(**options)
65
+ if mask_callback:
66
+ mask_callback(rectangle_image_draw)
67
+ rectangle_image = rectangle_image.rotate(label_rotate_angle, expand=1)
68
+ image.paste(im=rectangle_image, mask=rectangle_image)
69
+ # draw.bitmap((100, 100), rectangle_image)
70
+ return image
71
+
72
  def visualize_bboxes_on_image(
73
  image: Image.Image,
74
  bboxes: List[List[int]],
 
83
  label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
84
  label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
85
  label_text_size=DEFAULTS["label_text_size"],
86
+ convert_to_x0y0x1y1=None,
87
+ label_rotate_angle: int = 0) -> Image.Image:
88
  '''
89
  Visualize bounding boxes on an image
90
  Args:
 
101
  label_rectangle_top_margin: Top padding of the label rectangle
102
  label_text_size: Font size of the label text
103
  convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
104
+ label_rotate_angle: Angle to rotate the label text
105
  Returns:
106
  Image: Image annotated with bounding boxes
107
  '''
108
  image = image.copy().convert("RGB")
 
109
  font = get_font(size=label_text_size)
110
  labels = (labels or []) + np.full(len(bboxes) -
111
  len(labels or []), None).tolist()
 
117
  for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
118
  x0, y0, x1, y1 = convert_to_x0y0x1y1(
119
  bbox) if convert_to_x0y0x1y1 is not None else bbox
120
+
121
+ image = draw_bounding_box(
122
+ image = image,
123
+ bbox_outline_width = bbox_outline_width,
124
+ bbox_fill_color = _bbox_fill_color,
125
+ bbox_outline_color = _bbox_outline_color,
126
+ bbox = [x0, y0, x1, y1])
 
 
 
127
 
128
  if label is not None:
129
+ image = draw_text_on_image(
130
+ image = image,
131
+ text_position_xy = [x0, y0],
132
+ label = label,
133
+ label_text_color = label_text_color,
134
+ label_fill_color = label_fill_color,
135
+ label_text_padding = label_text_padding,
136
+ label_rectangle_left_margin = label_rectangle_left_margin,
137
+ label_rectangle_top_margin = label_rectangle_top_margin,
138
+ label_text_size = label_text_size,
139
+ font = font,
140
+ label_rotate_angle = label_rotate_angle)
141
  return image
142
 
 
143
  def draw_text_on_image(
144
+ image: Image.Image,
145
  text_position_xy: List[int],
146
  label: str,
147
  label_text_color=DEFAULTS["label_text_color"],
 
150
  label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
151
  label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
152
  label_text_size=DEFAULTS["label_text_size"],
153
+ font: ImageFont.FreeTypeFont = None,
154
+ label_rotate_angle: int = 0) -> Image.Image:
155
+ image = image.copy().convert("RGB")
156
  font = font or get_font(size=label_text_size)
157
  x0, y0 = text_position_xy
158
+ text_position = (
159
+ x0 - label_rectangle_left_margin + label_text_padding,
160
+ y0 - label_rectangle_top_margin + label_text_padding)
161
+ draw = ImageDraw.Draw(image)
162
+ _, _, text_bbox_right, text_bbox_bottom = draw.textbbox(text_position, label, font=font)
163
  xy = [
164
  text_position[0] - label_text_padding,
165
  text_position[1] - label_text_padding,
166
  text_bbox_right + label_text_padding + label_text_padding,
167
  text_bbox_bottom + label_text_padding + label_text_padding
168
  ]
169
+ image = draw_bounding_box(
170
+ image = image,
171
+ bbox_outline_width = 0,
172
+ bbox_fill_color = label_fill_color,
173
+ bbox_outline_color = None,
174
+ bbox = xy,
175
+ label_rotate_angle = label_rotate_angle,
176
+ mask_callback = lambda mask_draw: mask_draw.text(text_position, label, font=font, fill=label_text_color))
177
  return image