alps / utils /sorting.py
yumikimi381's picture
Upload folder using huggingface_hub
daf0288 verified
from typing import Any, List, Literal, Mapping, Optional, Tuple, Union, Dict, Type, Sequence
@staticmethod
#Based on deepdoctection
def group_words_into_lines(
word_boxes: List[List[int]], image_id: Optional[str] = None
) -> List[Tuple[int, int, str]]:
"""
Arranging words into horizontal text lines and sorting text lines vertically in order to give
an enumeration of words that is used for establishing the reading order.
Using this reading order arragement
Input:is numpy array of shape (n,5) where n is number of words and 5 is size of each element(array) with coordinate(xmin,ymin,xmax,ymax) + score
"""
"""
reading_lines: List to store tuples of the form (row_index, bbox).
rows: List to store dictionaries representing rows, with keys "upper" and "lower" representing the y-coordinates of the upper and lower bounds of the rows.
"""
reading_lines = []
rows: List[Dict[str,float]] = []
for bbox in word_boxes:
#For each word annotation, get the bounding box using word.get_bounding_box(image_id).
row_found = False
for row_idx, row in enumerate(rows):
row_cy = (row["upper"] + row["lower"]) / 2
ymin =bbox[1]
ymax =bbox[3]
bbox_cy = (ymin+ ymax) / 2
# word belongs to row if center lies within the upper and lower bounds of the row or if the center y
# coordinate lies within the upper and lower bounds of the word bounding boxes.
#if (row["upper"] < bounding_box.cy < row["lower"]) or (bounding_box.uly < row_cy < bounding_box.lry):
if (row["upper"] < bbox_cy < row["lower"]) or (ymin < row_cy < ymax):
reading_lines.append((row_idx,bbox))
row_found = True
break
# If word belongs to bound we do not update any row bounds. Thus, row bound are determined by the
# first word that defines the row
if not row_found:
rows.append({"upper": bbox[1] , "lower": bbox[3]})
reading_lines.append((len(rows) - 1, bbox))
"""
Create a dictionary rows_dict where keys are row indices and values are the original row indices, sorted by the upper bound of the rows.
Reassign row indices in reading_lines according to the vertical sort order defined in rows_dict.
"""
rows_dict = {k: rows[k] for k in range(len(rows))}
rows_dict = {
idx: key[0] # type:ignore
for idx, key in enumerate(sorted(rows_dict.items(), key=lambda it: it[1]["upper"]))
}
"""
Sort reading_lines by the row index (mapped through rows_dict) and then by the word’s xmin coordinate.
"""
reading_lines.sort(key=lambda x: (rows_dict[x[0]], x[1][0]))
number_rows = len(rows_dict)
#print("group_words_into_lines : number of rows : " + str(number_rows))
#print("group_words_into_lines : reading lines" + str(reading_lines))
#print("group_words_into_lines : rows_dict" + str(rows_dict))
onlywords = [ aTuple[1] for aTuple in reading_lines]
return onlywords