bachpc commited on
Commit
da7228c
·
1 Parent(s): 6b58b11

Minor changes

Browse files
Files changed (2) hide show
  1. app.py +7 -0
  2. postprocess.py +20 -7
app.py CHANGED
@@ -211,6 +211,13 @@ def convert_stucture(page_tokens, pil_img, structure_result):
211
  # print('table_class_objects:', table_class_objects)
212
  # print('table_bbox:', table_bbox)
213
 
 
 
 
 
 
 
 
214
  tokens_in_table = [token for token in page_tokens if postprocess.iob(token['bbox'], table_bbox) >= 0.001]
215
  # print('tokens_in_table:', tokens_in_table)
216
 
 
211
  # print('table_class_objects:', table_class_objects)
212
  # print('table_bbox:', table_bbox)
213
 
214
+ tmp = Rect(table_bbox)
215
+ for obj in table_objects:
216
+ if structure_class_names[obj['label']] in ('table column', 'table row'):
217
+ if postprocess.iob(obj['bbox'], table_bbox) >= 0.001:
218
+ tmp.include_rect(obj['bbox'])
219
+ table_bbox = (tmp[0], tmp[1], tmp[2], tmp[3])
220
+
221
  tokens_in_table = [token for token in page_tokens if postprocess.iob(token['bbox'], table_bbox) >= 0.001]
222
  # print('tokens_in_table:', tokens_in_table)
223
 
postprocess.py CHANGED
@@ -151,7 +151,7 @@ def refine_rows(rows, tokens, score_threshold):
151
  """
152
 
153
  if len(tokens) > 0:
154
- rows = nms_by_containment(rows, tokens, overlap_threshold=0.5)
155
  # remove_objects_without_content(tokens, rows) # TODO
156
  else:
157
  rows = nms(rows, match_criteria="object2_overlap",
@@ -169,7 +169,7 @@ def refine_columns(columns, tokens, score_threshold):
169
  """
170
 
171
  if len(tokens) > 0:
172
- columns = nms_by_containment(columns, tokens, overlap_threshold=0.5)
173
  # remove_objects_without_content(tokens, columns) # TODO
174
  else:
175
  columns = nms(columns, match_criteria="object2_overlap",
@@ -180,7 +180,7 @@ def refine_columns(columns, tokens, score_threshold):
180
  return columns
181
 
182
 
183
- def nms_by_containment(container_objects, package_objects, overlap_threshold=0.5):
184
  """
185
  Non-maxima suppression (NMS) of objects based on shared containment of other objects.
186
  """
@@ -198,10 +198,23 @@ def nms_by_containment(container_objects, package_objects, overlap_threshold=0.5
198
  for object1_num in range(object2_num):
199
  if not suppression[object1_num]:
200
  object1_packages = set(packages_by_container[object1_num])
201
- if len(object2_packages.intersection(object1_packages)) > 0 \
202
- and (iob(container_objects[object2_num]['bbox'], container_objects[object1_num]['bbox']) >= 0.5 \
203
- or iob(container_objects[object1_num]['bbox'], container_objects[object2_num]['bbox']) >= 0.5):
204
- suppression[object2_num] = True
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  final_objects = [obj for idx, obj in enumerate(container_objects) if not suppression[idx]]
207
  return final_objects
 
151
  """
152
 
153
  if len(tokens) > 0:
154
+ rows = nms_by_containment(rows, tokens, overlap_threshold=0.5, target='row')
155
  # remove_objects_without_content(tokens, rows) # TODO
156
  else:
157
  rows = nms(rows, match_criteria="object2_overlap",
 
169
  """
170
 
171
  if len(tokens) > 0:
172
+ columns = nms_by_containment(columns, tokens, overlap_threshold=0.5, target='column')
173
  # remove_objects_without_content(tokens, columns) # TODO
174
  else:
175
  columns = nms(columns, match_criteria="object2_overlap",
 
180
  return columns
181
 
182
 
183
+ def nms_by_containment(container_objects, package_objects, overlap_threshold=0.5, target='row'):
184
  """
185
  Non-maxima suppression (NMS) of objects based on shared containment of other objects.
186
  """
 
198
  for object1_num in range(object2_num):
199
  if not suppression[object1_num]:
200
  object1_packages = set(packages_by_container[object1_num])
201
+ if len(object2_packages.intersection(object1_packages)) > 0:
202
+ if target == 'row':
203
+ row1_height = container_objects[object1_num]['bbox'][3] - container_objects[object1_num]['bbox'][1]
204
+ row2_height = container_objects[object2_num]['bbox'][3] - container_objects[object2_num]['bbox'][1]
205
+ min_row_overlap = max(container_objects[object1_num]['bbox'][1], container_objects[object2_num]['bbox'][1])
206
+ max_row_overlap = min(container_objects[object1_num]['bbox'][3], container_objects[object2_num]['bbox'][3])
207
+ overlap_height = max_row_overlap - min_row_overlap
208
+ overlap_fraction = max(overlap_height/row1_height, overlap_height/row2_height)
209
+ elif target == 'column':
210
+ col1_height = container_objects[object1_num]['bbox'][2] - container_objects[object1_num]['bbox'][0]
211
+ col2_height = container_objects[object2_num]['bbox'][2] - container_objects[object2_num]['bbox'][0]
212
+ min_col_overlap = max(container_objects[object1_num]['bbox'][0], container_objects[object2_num]['bbox'][0])
213
+ max_col_overlap = min(container_objects[object1_num]['bbox'][2], container_objects[object2_num]['bbox'][2])
214
+ overlap_width = max_col_overlap - min_col_overlap
215
+ overlap_fraction = max(overlap_width/col1_height, overlap_width/col2_height)
216
+ if overlap_fraction >= 0.5:
217
+ suppression[object2_num] = True
218
 
219
  final_objects = [obj for idx, obj in enumerate(container_objects) if not suppression[idx]]
220
  return final_objects