Spaces:
Build error
Build error
Minor changes
Browse files- app.py +7 -0
- postprocess.py +20 -7
app.py
CHANGED
@@ -211,6 +211,13 @@ def convert_stucture(page_tokens, pil_img, structure_result):
|
|
211 |
# print('table_class_objects:', table_class_objects)
|
212 |
# print('table_bbox:', table_bbox)
|
213 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
214 |
tokens_in_table = [token for token in page_tokens if postprocess.iob(token['bbox'], table_bbox) >= 0.001]
|
215 |
# print('tokens_in_table:', tokens_in_table)
|
216 |
|
|
|
211 |
# print('table_class_objects:', table_class_objects)
|
212 |
# print('table_bbox:', table_bbox)
|
213 |
|
214 |
+
tmp = Rect(table_bbox)
|
215 |
+
for obj in table_objects:
|
216 |
+
if structure_class_names[obj['label']] in ('table column', 'table row'):
|
217 |
+
if postprocess.iob(obj['bbox'], table_bbox) >= 0.001:
|
218 |
+
tmp.include_rect(obj['bbox'])
|
219 |
+
table_bbox = (tmp[0], tmp[1], tmp[2], tmp[3])
|
220 |
+
|
221 |
tokens_in_table = [token for token in page_tokens if postprocess.iob(token['bbox'], table_bbox) >= 0.001]
|
222 |
# print('tokens_in_table:', tokens_in_table)
|
223 |
|
postprocess.py
CHANGED
@@ -151,7 +151,7 @@ def refine_rows(rows, tokens, score_threshold):
|
|
151 |
"""
|
152 |
|
153 |
if len(tokens) > 0:
|
154 |
-
rows = nms_by_containment(rows, tokens, overlap_threshold=0.5)
|
155 |
# remove_objects_without_content(tokens, rows) # TODO
|
156 |
else:
|
157 |
rows = nms(rows, match_criteria="object2_overlap",
|
@@ -169,7 +169,7 @@ def refine_columns(columns, tokens, score_threshold):
|
|
169 |
"""
|
170 |
|
171 |
if len(tokens) > 0:
|
172 |
-
columns = nms_by_containment(columns, tokens, overlap_threshold=0.5)
|
173 |
# remove_objects_without_content(tokens, columns) # TODO
|
174 |
else:
|
175 |
columns = nms(columns, match_criteria="object2_overlap",
|
@@ -180,7 +180,7 @@ def refine_columns(columns, tokens, score_threshold):
|
|
180 |
return columns
|
181 |
|
182 |
|
183 |
-
def nms_by_containment(container_objects, package_objects, overlap_threshold=0.5):
|
184 |
"""
|
185 |
Non-maxima suppression (NMS) of objects based on shared containment of other objects.
|
186 |
"""
|
@@ -198,10 +198,23 @@ def nms_by_containment(container_objects, package_objects, overlap_threshold=0.5
|
|
198 |
for object1_num in range(object2_num):
|
199 |
if not suppression[object1_num]:
|
200 |
object1_packages = set(packages_by_container[object1_num])
|
201 |
-
if len(object2_packages.intersection(object1_packages)) > 0
|
202 |
-
|
203 |
-
|
204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
|
206 |
final_objects = [obj for idx, obj in enumerate(container_objects) if not suppression[idx]]
|
207 |
return final_objects
|
|
|
151 |
"""
|
152 |
|
153 |
if len(tokens) > 0:
|
154 |
+
rows = nms_by_containment(rows, tokens, overlap_threshold=0.5, target='row')
|
155 |
# remove_objects_without_content(tokens, rows) # TODO
|
156 |
else:
|
157 |
rows = nms(rows, match_criteria="object2_overlap",
|
|
|
169 |
"""
|
170 |
|
171 |
if len(tokens) > 0:
|
172 |
+
columns = nms_by_containment(columns, tokens, overlap_threshold=0.5, target='column')
|
173 |
# remove_objects_without_content(tokens, columns) # TODO
|
174 |
else:
|
175 |
columns = nms(columns, match_criteria="object2_overlap",
|
|
|
180 |
return columns
|
181 |
|
182 |
|
183 |
+
def nms_by_containment(container_objects, package_objects, overlap_threshold=0.5, target='row'):
|
184 |
"""
|
185 |
Non-maxima suppression (NMS) of objects based on shared containment of other objects.
|
186 |
"""
|
|
|
198 |
for object1_num in range(object2_num):
|
199 |
if not suppression[object1_num]:
|
200 |
object1_packages = set(packages_by_container[object1_num])
|
201 |
+
if len(object2_packages.intersection(object1_packages)) > 0:
|
202 |
+
if target == 'row':
|
203 |
+
row1_height = container_objects[object1_num]['bbox'][3] - container_objects[object1_num]['bbox'][1]
|
204 |
+
row2_height = container_objects[object2_num]['bbox'][3] - container_objects[object2_num]['bbox'][1]
|
205 |
+
min_row_overlap = max(container_objects[object1_num]['bbox'][1], container_objects[object2_num]['bbox'][1])
|
206 |
+
max_row_overlap = min(container_objects[object1_num]['bbox'][3], container_objects[object2_num]['bbox'][3])
|
207 |
+
overlap_height = max_row_overlap - min_row_overlap
|
208 |
+
overlap_fraction = max(overlap_height/row1_height, overlap_height/row2_height)
|
209 |
+
elif target == 'column':
|
210 |
+
col1_height = container_objects[object1_num]['bbox'][2] - container_objects[object1_num]['bbox'][0]
|
211 |
+
col2_height = container_objects[object2_num]['bbox'][2] - container_objects[object2_num]['bbox'][0]
|
212 |
+
min_col_overlap = max(container_objects[object1_num]['bbox'][0], container_objects[object2_num]['bbox'][0])
|
213 |
+
max_col_overlap = min(container_objects[object1_num]['bbox'][2], container_objects[object2_num]['bbox'][2])
|
214 |
+
overlap_width = max_col_overlap - min_col_overlap
|
215 |
+
overlap_fraction = max(overlap_width/col1_height, overlap_width/col2_height)
|
216 |
+
if overlap_fraction >= 0.5:
|
217 |
+
suppression[object2_num] = True
|
218 |
|
219 |
final_objects = [obj for idx, obj in enumerate(container_objects) if not suppression[idx]]
|
220 |
return final_objects
|