Spaces:

mckabue
/

document-similarity-matching-using-visual-layout-features-archive

Build error

App Files Files Community

Charles Kabui commited on Mar 17

Commit

68847fc

•

1 Parent(s): 1d814bd

rotating bboxes

Browse files

Files changed (5) hide show

analysis.ipynb +0 -0
main.py +1 -1
utils/flatten.py +34 -0
utils/get_features.py +2 -2
utils/visualize_bboxes_on_image.py +66 -37

analysis.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

main.py CHANGED Viewed

@@ -32,7 +32,7 @@ visualize_bboxes_on_image_kwargs = {
     'label_rectangle_top_margin': 0
 }
 vectors_types = ['vectors', 'weighted_vectors',
-                 'reduced_vectors', 'weighted_reduced_vectors']
 def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):

     'label_rectangle_top_margin': 0
 }
 vectors_types = ['vectors', 'weighted_vectors',
+                 'reduced_vectors', 'reduced_weighted_vectors']
 def similarity_fn(model: lp.Detectron2LayoutModel, document_image_1: Image.Image, document_image_2: Image.Image, vectors_type: str):

utils/flatten.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from typing import Iterable, Literal
+import sys
+def flatten(iterable: Iterable, depth = sys.maxsize, return_type: Literal['list', 'generator'] = 'list') -> list | Iterable:
+    """
+    Flatten a nested iterable up to a specified depth.
+    Args:
+        iterable (iterable): The iterable to be expanded.
+        depth (int, optional): The depth to which the iterable should be expanded.
+                               Defaults to 1.
+        return_type (Literal['list', 'generator'], optional): The type of the return value.
+                                                              Defaults to 'list'.
+    Yields:
+        The expanded elements.
+    """
+    def expand(item, current_depth=0):
+        if current_depth == depth:
+            yield item
+        elif isinstance(item, (list, tuple, set)):
+            for sub_item in item:
+                yield from expand(sub_item, current_depth + 1)
+        else:
+            yield item
+    def generator():
+        for item in iterable:
+            yield from expand(item)
+    if return_type == 'list':
+        return list(generator())
+    return generator()

utils/get_features.py CHANGED Viewed

@@ -102,7 +102,7 @@ def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_name
     weighted_jaccard_index = False,
     **reduced_predictions)
-  weighted_reduced_vectors = get_vectors(
     sub_images_bboxes = sub_images_bboxes,
     label_names = label_names,
     weighted_jaccard_index = True,
@@ -119,5 +119,5 @@ def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_name
       'reduced_predicted_scores': reduced_predictions['predicted_scores'],
       'reduced_predicted_labels': reduced_predictions['predicted_labels'],
       'reduced_vectors': list(reduced_vectors),
-      'weighted_reduced_vectors': list(weighted_reduced_vectors),
   }

     weighted_jaccard_index = False,
     **reduced_predictions)
+  reduced_weighted_vectors = get_vectors(
     sub_images_bboxes = sub_images_bboxes,
     label_names = label_names,
     weighted_jaccard_index = True,
       'reduced_predicted_scores': reduced_predictions['predicted_scores'],
       'reduced_predicted_labels': reduced_predictions['predicted_labels'],
       'reduced_vectors': list(reduced_vectors),
+      'reduced_weighted_vectors': list(reduced_weighted_vectors),
   }

utils/visualize_bboxes_on_image.py CHANGED Viewed

@@ -3,7 +3,7 @@ from urllib.parse import urlparse
 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import requests
-from typing import List
 from functools import cache
 import matplotlib.colors as colors
@@ -33,7 +33,7 @@ def get_font(path_or_url: str = 'https://github.com/googlefonts/roboto/raw/main/
 named_colors_mapping = colors.get_named_colors_mapping()
 @cache
-def get_color(color: str | tuple) -> tuple | str:
     if isinstance(color, tuple):
         if len(color) == 2:
             real_color, alpha = (color[0], int(color[1]))
@@ -44,6 +44,31 @@ def get_color(color: str | tuple) -> tuple | str:
                     return tuple(real_color_alpha)
     return color
 def visualize_bboxes_on_image(
         image: Image.Image,
         bboxes: List[List[int]],
@@ -58,7 +83,8 @@ def visualize_bboxes_on_image(
         label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
         label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
         label_text_size=DEFAULTS["label_text_size"],
-        convert_to_x0y0x1y1=None) -> Image.Image:
     '''
     Visualize bounding boxes on an image
     Args:
@@ -75,11 +101,11 @@ def visualize_bboxes_on_image(
       label_rectangle_top_margin: Top padding of the label rectangle
       label_text_size: Font size of the label text
       convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
     Returns:
       Image: Image annotated with bounding boxes
     '''
     image = image.copy().convert("RGB")
-    draw = ImageDraw.Draw(image)
     font = get_font(size=label_text_size)
     labels = (labels or []) + np.full(len(bboxes) -
                                       len(labels or []), None).tolist()
@@ -91,34 +117,31 @@ def visualize_bboxes_on_image(
     for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
         x0, y0, x1, y1 = convert_to_x0y0x1y1(
             bbox) if convert_to_x0y0x1y1 is not None else bbox
-        _bbox_fill_color = get_color(_bbox_fill_color)
-        _bbox_outline_color = get_color(_bbox_outline_color)
-        rectangle_image = Image.new('RGBA', image.size)
-        rectangle_image_draw = ImageDraw.Draw(rectangle_image)
-        rectangle_image_draw.rectangle(
-            xy=[x0, y0, x1, y1],
-            fill=_bbox_fill_color,
-            outline=_bbox_outline_color,
-            width=bbox_outline_width)
-        image.paste(im=rectangle_image, mask=rectangle_image)
         if label is not None:
-            draw_text_on_image(
-                draw,
-                [x0, y0],
-                label,
-                label_text_color,
-                label_fill_color,
-                label_text_padding,
-                label_rectangle_left_margin,
-                label_rectangle_top_margin,
-                label_text_size,
-                font)
     return image
 def draw_text_on_image(
-        image_or_draw: Image.Image | ImageDraw.ImageDraw,
         text_position_xy: List[int],
         label: str,
         label_text_color=DEFAULTS["label_text_color"],
@@ -127,22 +150,28 @@ def draw_text_on_image(
         label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
         label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
         label_text_size=DEFAULTS["label_text_size"],
-        font: ImageFont.FreeTypeFont = None) -> Image.Image:
-    is_image = isinstance(image_or_draw, Image.Image)
-    image = image_or_draw.copy().convert("RGB") if is_image else None
     font = font or get_font(size=label_text_size)
     x0, y0 = text_position_xy
-    text_position = (x0 - label_rectangle_left_margin + label_text_padding,
-                     y0 - label_rectangle_top_margin + label_text_padding)
-    draw = ImageDraw.Draw(image) if is_image else image_or_draw
-    _, _, text_bbox_right, text_bbox_bottom = draw.textbbox(
-        text_position, label, font=font)
     xy = [
         text_position[0] - label_text_padding,
         text_position[1] - label_text_padding,
         text_bbox_right + label_text_padding + label_text_padding,
         text_bbox_bottom + label_text_padding + label_text_padding
     ]
-    draw.rectangle(xy, fill=label_fill_color)
-    draw.text(text_position, label, font=font, fill=label_text_color)
     return image

 from PIL import Image, ImageDraw, ImageFont
 import numpy as np
 import requests
+from typing import List, Callable
 from functools import cache
 import matplotlib.colors as colors
 named_colors_mapping = colors.get_named_colors_mapping()
 @cache
+def parse_color(color: str | tuple) -> tuple | str:
     if isinstance(color, tuple):
         if len(color) == 2:
             real_color, alpha = (color[0], int(color[1]))
                     return tuple(real_color_alpha)
     return color
+def draw_bounding_box(
+        image: Image.Image,
+        bbox_outline_width: int,
+        bbox_fill_color: str | list[tuple | str],
+        bbox_outline_color: str | list[tuple | str],
+        bbox: List[List[int]],
+        label_rotate_angle: int = 0,
+        mask_callback: Callable[[ImageDraw.ImageDraw], None] = None) -> Image.Image:
+    options = {
+        'xy': bbox,
+        'fill': parse_color(bbox_fill_color) if bbox_fill_color else None,
+        'outline': parse_color(bbox_outline_color) if bbox_outline_color else None,
+        'width': bbox_outline_width
+    }
+    options = {k: v for k, v in options.items() if v is not None}
+    rectangle_image = Image.new('RGBA', image.size)
+    rectangle_image_draw = ImageDraw.Draw(rectangle_image)
+    rectangle_image_draw.rectangle(**options)
+    if mask_callback:
+        mask_callback(rectangle_image_draw)
+    rectangle_image = rectangle_image.rotate(label_rotate_angle,  expand=1)
+    image.paste(im=rectangle_image, mask=rectangle_image)
+    # draw.bitmap((100, 100), rectangle_image)
+    return image
 def visualize_bboxes_on_image(
         image: Image.Image,
         bboxes: List[List[int]],
         label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
         label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
         label_text_size=DEFAULTS["label_text_size"],
+        convert_to_x0y0x1y1=None,
+        label_rotate_angle: int = 0) -> Image.Image:
     '''
     Visualize bounding boxes on an image
     Args:
       label_rectangle_top_margin: Top padding of the label rectangle
       label_text_size: Font size of the label text
       convert_to_x0y0x1y1: Function to convert bounding box to x0y0x1y1 format
+      label_rotate_angle: Angle to rotate the label text
     Returns:
       Image: Image annotated with bounding boxes
     '''
     image = image.copy().convert("RGB")
     font = get_font(size=label_text_size)
     labels = (labels or []) + np.full(len(bboxes) -
                                       len(labels or []), None).tolist()
     for bbox, label, _bbox_fill_color, _bbox_outline_color in zip(bboxes, labels, bbox_fill_colors, bbox_outline_colors):
         x0, y0, x1, y1 = convert_to_x0y0x1y1(
             bbox) if convert_to_x0y0x1y1 is not None else bbox
+        image = draw_bounding_box(
+            image = image,
+            bbox_outline_width = bbox_outline_width,
+            bbox_fill_color = _bbox_fill_color,
+            bbox_outline_color = _bbox_outline_color,
+            bbox = [x0, y0, x1, y1])
         if label is not None:
+            image = draw_text_on_image(
+                image = image,
+                text_position_xy = [x0, y0],
+                label = label,
+                label_text_color = label_text_color,
+                label_fill_color = label_fill_color,
+                label_text_padding = label_text_padding,
+                label_rectangle_left_margin = label_rectangle_left_margin,
+                label_rectangle_top_margin = label_rectangle_top_margin,
+                label_text_size = label_text_size,
+                font = font,
+                label_rotate_angle = label_rotate_angle)
     return image
 def draw_text_on_image(
+        image: Image.Image,
         text_position_xy: List[int],
         label: str,
         label_text_color=DEFAULTS["label_text_color"],
         label_rectangle_left_margin=DEFAULTS["label_rectangle_left_margin"],
         label_rectangle_top_margin=DEFAULTS['label_rectangle_top_margin'],
         label_text_size=DEFAULTS["label_text_size"],
+        font: ImageFont.FreeTypeFont = None,
+        label_rotate_angle: int = 0) -> Image.Image:
+    image = image.copy().convert("RGB")
     font = font or get_font(size=label_text_size)
     x0, y0 = text_position_xy
+    text_position = (
+        x0 - label_rectangle_left_margin + label_text_padding,
+        y0 - label_rectangle_top_margin + label_text_padding)
+    draw = ImageDraw.Draw(image)
+    _, _, text_bbox_right, text_bbox_bottom = draw.textbbox(text_position, label, font=font)
     xy = [
         text_position[0] - label_text_padding,
         text_position[1] - label_text_padding,
         text_bbox_right + label_text_padding + label_text_padding,
         text_bbox_bottom + label_text_padding + label_text_padding
     ]
+    image = draw_bounding_box(
+        image = image,
+        bbox_outline_width = 0,
+        bbox_fill_color = label_fill_color,
+        bbox_outline_color = None,
+        bbox = xy,
+        label_rotate_angle = label_rotate_angle,
+        mask_callback = lambda mask_draw: mask_draw.text(text_position, label, font=font, fill=label_text_color))
     return image