Spaces:

bachpc
/

table-structure-recognition

Build error

App Files Files Community

bachpc commited on Apr 3, 2023

Commit

ba538d2

1 Parent(s): 76f6ada

Add detection

Browse files

Files changed (4) hide show

app.py +261 -102
postprocess.py +35 -37
requirements.txt +77 -18
weights/detection_wts.pt +3 -0

app.py CHANGED Viewed

@@ -1,30 +1,28 @@
 import streamlit as st
 import PIL
 import numpy as np
 import torch
-from collections import defaultdict
-import cv2
-from doctr.io import DocumentFile
-from doctr.models import ocr_predictor
-from doctr.utils.visualization import visualize_page
 import pytesseract
 from pytesseract import Output
-from bs4 import BeautifulSoup as bs
-from html import escape
-import sys, json
 import postprocess
-ocr_predictor = ocr_predictor('db_resnet50', 'crnn_vgg16_bn', pretrained=True)
 structure_model = torch.hub.load('ultralytics/yolov5', 'custom', 'weights/structure_wts.pt', force_reload=True)
 imgsz = 640
 structure_class_names = [
     'table', 'table column', 'table row', 'table column header',
     'table projected row header', 'table spanning cell', 'no object'
@@ -42,15 +40,22 @@ structure_class_thresholds = {
 def PIL_to_cv(pil_img):
-    return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
 def cv_to_PIL(cv_img):
     return PIL.Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB))
-def table_structure(filename):
-    pil_img = PIL.Image.open(filename)
     image = PIL_to_cv(pil_img)
     pred = structure_model(image, size=imgsz)
     pred = pred.xywhn[0]
@@ -58,32 +63,59 @@ def table_structure(filename):
     return result
-def ocr(filename):
-    doc = DocumentFile.from_images(filename.read())
-    result = ocr_predictor(doc).export()
-    result = result['pages'][0]
-    H, W = result['dimensions']
     ocr_res = []
-    for block in result['blocks']:
-        for line in block['lines']:
-            for word in line['words']:
-                bbox = word['geometry']
-                word_info = {
-                    'bbox': [int(bbox[0][0] * W), int(bbox[0][1] * H), int(bbox[1][0] * W), int(bbox[1][1] * H)],
-                    'text': word['value']
-                }
-                ocr_res.append(word_info)
     return ocr_res
-def convert_stucture(page_tokens, filename, structure_result):
-    pil_img = PIL.Image.open(filename)
     image = PIL_to_cv(pil_img)
     width = image.shape[1]
     height = image.shape[0]
     # print(width, height)
     bboxes = []
     scores = []
     labels = []
@@ -94,11 +126,11 @@ def convert_stucture(page_tokens, filename, structure_result):
         min_y = result[1]
         w = result[2]
         h = result[3]
-        x1 = int((min_x-w/2)*width)
-        y1 = int((min_y-h/2)*height)
-        x2 = int((min_x+w/2)*width)
-        y2 = int((min_y+h/2)*height)
         # print(x1, y1, x2, y2)
         bboxes.append([x1, y1, x2, y2])
@@ -109,9 +141,9 @@ def convert_stucture(page_tokens, filename, structure_result):
     for bbox, score, label in zip(bboxes, scores, labels):
         table_objects.append({'bbox': bbox, 'score': score, 'label': label})
     # print('table_objects:', table_objects)
     table = {'objects': table_objects, 'page_num': 0}
     table_class_objects = [obj for obj in table_objects if obj['label'] == structure_class_map['table']]
     if len(table_class_objects) > 1:
         table_class_objects = sorted(table_class_objects, key=lambda x: x['score'], reverse=True)
@@ -121,17 +153,54 @@ def convert_stucture(page_tokens, filename, structure_result):
         table_bbox = (0,0,1000,1000)
     # print('table_class_objects:', table_class_objects)
     # print('table_bbox:', table_bbox)
     tokens_in_table = [token for token in page_tokens if postprocess.iob(token['bbox'], table_bbox) >= 0.5]
     # print('tokens_in_table:', tokens_in_table)
     table_structures, cells, confidence_score = postprocess.objects_to_cells(table, table_objects, tokens_in_table, structure_class_names, structure_class_thresholds)
     return table_structures, cells, confidence_score
-def visualize_cells(filename, cells, ax):
-    pil_img = PIL.Image.open(filename)
     image = PIL_to_cv(pil_img)
     for i, cell in enumerate(cells):
         bbox = cell['bbox']
@@ -140,7 +209,7 @@ def visualize_cells(filename, cells, ax):
         x2 = int(bbox[2])
         y2 = int(bbox[3])
         cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
-    ax.image(cv_to_PIL(image))
 def pytess(cell_pil_img):
@@ -175,55 +244,125 @@ def remove_noise_and_smooth(pil_img):
     return pil_img
-def extract_text_from_cells(filename, cells):
-    pil_img = PIL.Image.open(filename)
-    pil_img, factor = resize(pil_img)
-    #pil_img = remove_noise_and_smooth(pil_img)
-    #display(pil_img)
     for cell in cells:
-        bbox = [x * factor for x in cell['bbox']]
-        cell_pil_img = pil_img.crop(bbox)
-        #cell_pil_img = remove_noise_and_smooth(cell_pil_img)
-        #cell_pil_img = tess_prep(cell_pil_img)
-        cell['text'] = pytess(cell_pil_img)
     return cells
 def cells_to_html(cells):
     for cell in cells:
-        cell['column_nums'].sort()
-        cell['row_nums'].sort()
-    n_cols = max(cell['column_nums'][-1] for cell in cells) + 1
-    n_rows = max(cell['row_nums'][-1] for cell in cells) + 1
-    html_code = ''
-    for r in range(n_rows):
-        r_cells = [cell for cell in cells if cell['row_nums'][0] == r]
-        r_cells.sort(key=lambda x: x['column_nums'][0])
-        r_html = ''
-        for cell in r_cells:
-            rowspan = cell['row_nums'][-1] - cell['row_nums'][0] + 1
-            colspan = cell['column_nums'][-1] - cell['column_nums'][0] + 1
-            r_html += f'<td rowspan="{rowspan}" colspan="{colspan}">{escape(cell["text"])}</td>'
-        html_code += f'<tr>{r_html}</tr>'
-    html_code = '''<html>
-                   <head>
-                   <meta charset="UTF-8">
-                   <style>
-                   table, th, td {
-                     border: 1px solid black;
-                     font-size: 10px;
-                   }
-                   </style>
-                   </head>
-                   <body>
-                   <table frame="hsides" rules="groups" width="100%%">
-                     %s
-                   </table>
-                   </body>
-                   </html>''' % html_code
-    soup = bs(html_code)
-    html_code = soup.prettify()
-    return html_code
 def main():
@@ -234,7 +373,7 @@ def main():
     cols = st.columns((1, 1))
     cols[0].subheader("Input page")
-    cols[1].subheader("Structure output")
     st.sidebar.title("Image upload")
     st.set_option('deprecation.showfileUploaderEncoding', False)
@@ -247,19 +386,39 @@ def main():
         else:
             print(filename)
-            cols[0].image(filename)
-            ocr_res = ocr(filename)
-            structure_result = table_structure(filename)
-            table_structures, cells, confidence_score = convert_stucture(ocr_res, filename, structure_result)
-            visualize_cells(filename, cells, cols[1])
-            cells = extract_text_from_cells(filename, cells)
-            html_code = cells_to_html(cells)
-            st.markdown("\nHTML output:")
-            st.markdown(html_code, unsafe_allow_html=True)
 if __name__ == '__main__':

 import streamlit as st
 import PIL
+import cv2
 import numpy as np
+import pandas as pd
 import torch
+# import sys
+# import json
+from collections import OrderedDict, defaultdict
+import xml.etree.ElementTree as ET
+from paddleocr import PaddleOCR
 import pytesseract
 from pytesseract import Output
 import postprocess
+ocr_instance = PaddleOCR(use_angle_cls=False, lang='en', use_gpu=True)
+detection_model = torch.hub.load('ultralytics/yolov5', 'custom', 'weights/detection_wts.pt', force_reload=True)
 structure_model = torch.hub.load('ultralytics/yolov5', 'custom', 'weights/structure_wts.pt', force_reload=True)
 imgsz = 640
+detection_class_names = ['table', 'table rotated']
 structure_class_names = [
     'table', 'table column', 'table row', 'table column header',
     'table projected row header', 'table spanning cell', 'no object'
 def PIL_to_cv(pil_img):
+    return cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR)
 def cv_to_PIL(cv_img):
     return PIL.Image.fromarray(cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB))
+def table_detection(pil_img):
+    image = PIL_to_cv(pil_img)
+    pred = detection_model(image, size=imgsz)
+    pred = pred.xywhn[0]
+    result = pred.cpu().numpy()
+    return result
+def table_structure(pil_img):
     image = PIL_to_cv(pil_img)
     pred = structure_model(image, size=imgsz)
     pred = pred.xywhn[0]
     return result
+def crop_image(pil_img, detection_result):
+    crop_images = []
+    image = PIL_to_cv(pil_img)
+    width = image.shape[1]
+    height = image.shape[0]
+    # print(width, height)
+    for i, result in enumerate(detection_result):
+        class_id = int(result[5])
+        score = float(result[4])
+        min_x = result[0]
+        min_y = result[1]
+        w = result[2]
+        h = result[3]
+        x1 = max(0, int((min_x - w / 2 - 0.02) * width))
+        y1 = max(0, int((min_y - h / 2 - 0.02) * height))
+        x2 = min(width, int((min_x + w / 2 + 0.02) * width))
+        y2 = min(height, int((min_y + h / 2 + 0.02) * height))
+        # print(x1, y1, x2, y2)
+        crop_image = image[y1:y2, x1:x2, :]
+        crop_images.append(cv_to_PIL(crop_image))
+        cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
+    return crop_images, cv_to_PIL(image)
+def ocr(pil_img):
+    image = PIL_to_cv(pil_img)
+    result = ocr_instance.ocr(image)
     ocr_res = []
+    for ps, (text, score) in result[0]:
+        x1 = min(p[0] for p in ps)
+        y1 = min(p[1] for p in ps)
+        x2 = max(p[0] for p in ps)
+        y2 = max(p[1] for p in ps)
+        word_info = {
+            'bbox': [x1, y1, x2, y2],
+            'text': text
+        }
+        ocr_res.append(word_info)
     return ocr_res
+def convert_stucture(page_tokens, pil_img, structure_result):
     image = PIL_to_cv(pil_img)
     width = image.shape[1]
     height = image.shape[0]
     # print(width, height)
     bboxes = []
     scores = []
     labels = []
         min_y = result[1]
         w = result[2]
         h = result[3]
+        x1 = int((min_x - w / 2) * width)
+        y1 = int((min_y - h / 2) * height)
+        x2 = int((min_x + w / 2) * width)
+        y2 = int((min_y + h / 2) * height)
         # print(x1, y1, x2, y2)
         bboxes.append([x1, y1, x2, y2])
     for bbox, score, label in zip(bboxes, scores, labels):
         table_objects.append({'bbox': bbox, 'score': score, 'label': label})
     # print('table_objects:', table_objects)
     table = {'objects': table_objects, 'page_num': 0}
     table_class_objects = [obj for obj in table_objects if obj['label'] == structure_class_map['table']]
     if len(table_class_objects) > 1:
         table_class_objects = sorted(table_class_objects, key=lambda x: x['score'], reverse=True)
         table_bbox = (0,0,1000,1000)
     # print('table_class_objects:', table_class_objects)
     # print('table_bbox:', table_bbox)
     tokens_in_table = [token for token in page_tokens if postprocess.iob(token['bbox'], table_bbox) >= 0.5]
     # print('tokens_in_table:', tokens_in_table)
     table_structures, cells, confidence_score = postprocess.objects_to_cells(table, table_objects, tokens_in_table, structure_class_names, structure_class_thresholds)
     return table_structures, cells, confidence_score
+def visualize_ocr(pil_img, ocr_result):
+    image = PIL_to_cv(pil_img)
+    for i, res in enumerate(ocr_result):
+        bbox = res['bbox']
+        x1 = int(bbox[0])
+        y1 = int(bbox[1])
+        x2 = int(bbox[2])
+        y2 = int(bbox[3])
+        cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
+    return cv_to_PIL(image)
+def visualize_structure(pil_img, structure_result):
+    image = PIL_to_cv(pil_img)
+    width = image.shape[1]
+    height = image.shape[0]
+    # print(width, height)
+    for i, result in enumerate(structure_result):
+        class_id = int(result[5])
+        score = float(result[4])
+        min_x = result[0]
+        min_y = result[1]
+        w = result[2]
+        h = result[3]
+        x1 = int((min_x - w / 2) * width)
+        y1 = int((min_y - h / 2) * height)
+        x2 = int((min_x + w / 2) * width)
+        y2 = int((min_y + h / 2) * height)
+        # print(x1, y1, x2, y2)
+        if score >= structure_class_map[structure_class_names[class_id]]:
+            cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 0, 255))
+            #cv2.putText(image, str(i)+'-'+str(class_id), (x1-10, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))
+    return cv_to_PIL(image)
+def visualize_cells(pil_img, cells):
     image = PIL_to_cv(pil_img)
     for i, cell in enumerate(cells):
         bbox = cell['bbox']
         x2 = int(bbox[2])
         y2 = int(bbox[3])
         cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
+    return cv_to_PIL(image)
 def pytess(cell_pil_img):
     return pil_img
+# def extract_text_from_cells(pil_img, cells):
+#     pil_img, factor = resize(pil_img)
+#     #pil_img = remove_noise_and_smooth(pil_img)
+#     #display(pil_img)
+#     for cell in cells:
+#         bbox = [x * factor for x in cell['bbox']]
+#         cell_pil_img = pil_img.crop(bbox)
+#         #cell_pil_img = remove_noise_and_smooth(cell_pil_img)
+#         #cell_pil_img = tess_prep(cell_pil_img)
+#         cell['cell text'] = pytess(cell_pil_img)
+#     return cells
+def extract_text_from_cells(cells, sep=' '):
     for cell in cells:
+        spans = cell['spans']
+        text = ''
+        for span in spans:
+            if 'text' in span:
+                text += span['text'] + sep
+        cell['cell_text'] = text
     return cells
+def cells_to_csv(cells):
+    if len(cells) > 0:
+        num_columns = max([max(cell['column_nums']) for cell in cells]) + 1
+        num_rows = max([max(cell['row_nums']) for cell in cells]) + 1
+    else:
+        return
+    header_cells = [cell for cell in cells if cell['header']]
+    if len(header_cells) > 0:
+        max_header_row = max([max(cell['row_nums']) for cell in header_cells])
+    else:
+        max_header_row = -1
+    table_array = np.empty([num_rows, num_columns], dtype="object")
+    if len(cells) > 0:
+        for cell in cells:
+            for row_num in cell['row_nums']:
+                for column_num in cell['column_nums']:
+                    table_array[row_num, column_num] = cell["cell_text"]
+    header = table_array[:max_header_row+1,:]
+    flattened_header = []
+    for col in header.transpose():
+        flattened_header.append(' | '.join(OrderedDict.fromkeys(col)))
+    df = pd.DataFrame(table_array[max_header_row+1:,:], index=None, columns=flattened_header)
+    return df, df.to_csv(index=None)
 def cells_to_html(cells):
+    cells = sorted(cells, key=lambda k: min(k['column_nums']))
+    cells = sorted(cells, key=lambda k: min(k['row_nums']))
+    table = ET.Element("table")
+    current_row = -1
     for cell in cells:
+        this_row = min(cell['row_nums'])
+        attrib = {}
+        colspan = len(cell['column_nums'])
+        if colspan > 1:
+            attrib['colspan'] = str(colspan)
+        rowspan = len(cell['row_nums'])
+        if rowspan > 1:
+            attrib['rowspan'] = str(rowspan)
+        if this_row > current_row:
+            current_row = this_row
+            if cell['header']:
+                cell_tag = "th"
+                row = ET.SubElement(table, "thead")
+            else:
+                cell_tag = "td"
+                row = ET.SubElement(table, "tr")
+        tcell = ET.SubElement(row, cell_tag, attrib=attrib)
+        tcell.text = cell['cell_text']
+    return str(ET.tostring(table, encoding="unicode", short_empty_elements=False))
+# def cells_to_html(cells):
+#     for cell in cells:
+#         cell['column_nums'].sort()
+#         cell['row_nums'].sort()
+#     n_cols = max(cell['column_nums'][-1] for cell in cells) + 1
+#     n_rows = max(cell['row_nums'][-1] for cell in cells) + 1
+#     html_code = ''
+#     for r in range(n_rows):
+#         r_cells = [cell for cell in cells if cell['row_nums'][0] == r]
+#         r_cells.sort(key=lambda x: x['column_nums'][0])
+#         r_html = ''
+#         for cell in r_cells:
+#             rowspan = cell['row_nums'][-1] - cell['row_nums'][0] + 1
+#             colspan = cell['column_nums'][-1] - cell['column_nums'][0] + 1
+#             r_html += f'<td rowspan="{rowspan}" colspan="{colspan}">{escape(cell["text"])}</td>'
+#         html_code += f'<tr>{r_html}</tr>'
+#     html_code = '''<html>
+#                    <head>
+#                    <meta charset="UTF-8">
+#                    <style>
+#                    table, th, td {
+#                      border: 1px solid black;
+#                      font-size: 10px;
+#                    }
+#                    </style>
+#                    </head>
+#                    <body>
+#                    <table frame="hsides" rules="groups" width="100%%">
+#                      %s
+#                    </table>
+#                    </body>
+#                    </html>''' % html_code
+#     soup = bs(html_code)
+#     html_code = soup.prettify()
+#     return html_code
 def main():
     cols = st.columns((1, 1))
     cols[0].subheader("Input page")
+    cols[1].subheader("Table(s) detected")
     st.sidebar.title("Image upload")
     st.set_option('deprecation.showfileUploaderEncoding', False)
         else:
             print(filename)
+            pil_img = PIL.Image.open(filename)
+            detection_result = table_detection(pil_img)
+            crop_images, vis_det_img = crop_image(pil_img, detection_result)
+            cols[0].image(vis_det_img)
+            str_cols = st.columns((len(crop_images), ) * 6)
+            str_cols[0].subheader("Table image")
+            str_cols[1].subheader("OCR result")
+            str_cols[2].subheader("Structure result")
+            str_cols[3].subheader("Cells result")
+            str_cols[4].subheader("HTML result")
+            str_cols[5].subheader("CSV result")
+            for img in crop_images:
+                ocr_result = ocr(img)
+                structure_result = table_structure(img)
+                table_structures, cells, confidence_score = convert_stucture(ocr_result, img, structure_result)
+                cells = extract_text_from_cells(cells)
+                html_result = cells_to_html(cells)
+                df, csv_result = cells_to_csv(cells)
+                vis_ocr_img = visualize_ocr(img, ocr_result)
+                vis_str_img = visualize_structure(img, structure_result)
+                vis_cells_img = visualize_cells(img, cells)
+                str_cols[0].image(img)
+                str_cols[1].image(vis_ocr_img)
+                str_cols[2].image(vis_str_img)
+                str_cols[3].image(vis_cells_img)
+                str_cols[4].markdown(html_result, unsafe_allow_html=True)
+                str_cols[5].dataframe(df)
+                str_cols[5].download_button("Download table", csv_result, "file.csv", "text/csv", key='download-csv')
 if __name__ == '__main__':

postprocess.py CHANGED Viewed

@@ -38,9 +38,9 @@ def iou(bbox1, bbox2):
     intersection = Rect(bbox1).intersect(bbox2)
     union = Rect(bbox1).include_rect(bbox2)
-    union_area = union.get_area()  # getArea()
     if union_area > 0:
-        return intersection.get_area() / union.get_area()  # .getArea()
     return 0
@@ -51,9 +51,9 @@ def iob(bbox1, bbox2):
     """
     intersection = Rect(bbox1).intersect(bbox2)
-    bbox1_area = Rect(bbox1).get_area()  # .getArea()
     if bbox1_area > 0:
-        return intersection.get_area() / bbox1_area  # getArea()
     return 0
@@ -144,36 +144,36 @@ def objects_to_table_structures(table_object, objects_in_table, tokens_in_table,
     return table_structures
-def refine_rows(rows, page_spans, score_threshold):
     """
     Apply operations to the detected rows, such as
     thresholding, NMS, and alignment.
     """
-    #MODIFY
-    rows = [obj for obj in rows if obj['score'] >= score_threshold or obj['header']]
-    ###
-    rows = nms_by_containment(rows, page_spans, overlap_threshold=0.5)
-    # remove_objects_without_content(page_spans, rows)  # TODO
     if len(rows) > 1:
         rows = sort_objects_top_to_bottom(rows)
     return rows
-def refine_columns(columns, page_spans, score_threshold):
     """
     Apply operations to the detected columns, such as
     thresholding, NMS, and alignment.
     """
-    #MODIFY
-    columns = [obj for obj in columns if obj['score'] >= score_threshold]
-    ###
-    columns = nms_by_containment(columns, page_spans, overlap_threshold=0.5)
-    # remove_objects_without_content(page_spans, columns)  # TODO
     if len(columns) > 1:
         columns = sort_objects_left_to_right(columns)
@@ -222,10 +222,10 @@ def slot_into_containers(container_objects, package_objects, overlap_threshold=0
     for package_num, package in enumerate(package_objects):
         match_scores = []
         package_rect = Rect(package['bbox'])
-        package_area = package_rect.get_area()  # getArea()
         for container_num, container in enumerate(container_objects):
             container_rect = Rect(container['bbox'])
-            intersect_area = container_rect.intersect(package['bbox']).get_area()  # getArea()
             overlap_fraction = intersect_area / package_area
             match_scores.append({'container': container, 'container_num': container_num, 'score': overlap_fraction})
@@ -298,10 +298,10 @@ def overlaps(bbox1, bbox2, threshold=0.5):
     Test if more than "threshold" fraction of bbox1 overlaps with bbox2.
     """
     rect1 = Rect(list(bbox1))
-    area1 = rect1.get_area()  # .getArea()
     if area1 == 0:
         return False
-    return rect1.intersect(list(bbox2)).get_area()/area1 >= threshold  # getArea()
 def extract_text_from_spans(spans, join_with_space=True, remove_integer_superscripts=True):
@@ -317,6 +317,8 @@ def extract_text_from_spans(spans, join_with_space=True, remove_integer_superscr
     if remove_integer_superscripts:
         for span in spans:
             flags = span['flags']
             if flags & 2**0: # superscript flag
                 if is_int(span['text']):
@@ -438,7 +440,7 @@ def refine_table_structures(table_bbox, table_structures, page_spans, class_thre
     return table_structures
-def nms(objects, match_criteria="object2_overlap", match_threshold=0.05, keep_metric="score", keep_higher=True):
     """
     A customizable version of non-maxima suppression (NMS).
@@ -448,28 +450,24 @@ def nms(objects, match_criteria="object2_overlap", match_threshold=0.05, keep_me
     objects: set of dicts; each object dict must have a 'bbox' and a 'score' field
     match_criteria: how to measure how much two objects "overlap"
     match_threshold: the cutoff for determining that overlap requires suppression of one object
-    keep_metric: which metric to use to determine the object to keep
     keep_higher: if True, keep the object with the higher metric; otherwise, keep the lower
     """
     if len(objects) == 0:
         return []
-    if keep_metric=="score":
-        objects = sort_objects_by_score(objects, reverse=keep_higher)
-    elif keep_metric=="area":
-        objects = sort_objects_by_area(objects, reverse=keep_higher)
     num_objects = len(objects)
     suppression = [False for obj in objects]
     for object2_num in range(1, num_objects):
         object2_rect = Rect(objects[object2_num]['bbox'])
-        object2_area = object2_rect.get_area()  # .getArea()
         for object1_num in range(object2_num):
             if not suppression[object1_num]:
                 object1_rect = Rect(objects[object1_num]['bbox'])
-                object1_area = object1_rect.get_area()  # .getArea()
-                intersect_area = object1_rect.intersect(object2_rect).get_area()  # .getArea()
                 try:
                     if match_criteria=="object1_overlap":
                         metric = intersect_area / object1_area
@@ -719,8 +717,8 @@ def table_structure_to_cells(table_structures, table_spans, table_bbox):
             cell['subcell'] = False
             for supercell in supercells:
                 supercell_rect = Rect(list(supercell['bbox']))
-                if (supercell_rect.intersect(cell_rect).get_area()  # .getArea()
-                        / cell_rect.get_area()) > 0.5:  # getArea()
                     cell['subcell'] = True
                     break
@@ -740,8 +738,8 @@ def table_structure_to_cells(table_structures, table_spans, table_bbox):
         header = True
         for subcell in subcells:
             subcell_rect = Rect(list(subcell['bbox']))
-            subcell_rect_area = subcell_rect.get_area()  # .getArea()
-            if (subcell_rect.intersect(supercell_rect).get_area()  # .getArea()
                     / subcell_rect_area) > 0.5:
                 if cell_rect is None:
                     cell_rect = Rect(list(subcell['bbox']))
@@ -838,7 +836,7 @@ def table_structure_to_cells(table_structures, table_spans, table_bbox):
         for column_num in cell['column_nums']:
             column_rect.include_rect(list(columns[column_num]['bbox']))
         cell_rect = row_rect.intersect(column_rect)
-        if cell_rect.get_area() > 0:  # getArea()
             cell['bbox'] = list(cell_rect)
             pass

     intersection = Rect(bbox1).intersect(bbox2)
     union = Rect(bbox1).include_rect(bbox2)
+    union_area = union.get_area()
     if union_area > 0:
+        return intersection.get_area() / union.get_area()
     return 0
     """
     intersection = Rect(bbox1).intersect(bbox2)
+    bbox1_area = Rect(bbox1).get_area()
     if bbox1_area > 0:
+        return intersection.get_area() / bbox1_area
     return 0
     return table_structures
+def refine_rows(rows, tokens, score_threshold):
     """
     Apply operations to the detected rows, such as
     thresholding, NMS, and alignment.
     """
+    if len(tokens) > 0:
+        rows = nms_by_containment(rows, tokens, overlap_threshold=0.5)
+        # remove_objects_without_content(tokens, rows)  # TODO
+    else:
+        rows = nms(rows, match_criteria="object2_overlap",
+                   match_threshold=0.5, keep_higher=True)
     if len(rows) > 1:
         rows = sort_objects_top_to_bottom(rows)
     return rows
+def refine_columns(columns, tokens, score_threshold):
     """
     Apply operations to the detected columns, such as
     thresholding, NMS, and alignment.
     """
+    if len(tokens) > 0:
+        columns = nms_by_containment(columns, tokens, overlap_threshold=0.5)
+        # remove_objects_without_content(tokens, columns)  # TODO
+    else:
+        columns = nms(columns, match_criteria="object2_overlap",
+                   match_threshold=0.25, keep_higher=True)
     if len(columns) > 1:
         columns = sort_objects_left_to_right(columns)
     for package_num, package in enumerate(package_objects):
         match_scores = []
         package_rect = Rect(package['bbox'])
+        package_area = package_rect.get_area()
         for container_num, container in enumerate(container_objects):
             container_rect = Rect(container['bbox'])
+            intersect_area = container_rect.intersect(package['bbox']).get_area()
             overlap_fraction = intersect_area / package_area
             match_scores.append({'container': container, 'container_num': container_num, 'score': overlap_fraction})
     Test if more than "threshold" fraction of bbox1 overlaps with bbox2.
     """
     rect1 = Rect(list(bbox1))
+    area1 = rect1.get_area()
     if area1 == 0:
         return False
+    return rect1.intersect(list(bbox2)).get_area()/area1 >= threshold
 def extract_text_from_spans(spans, join_with_space=True, remove_integer_superscripts=True):
     if remove_integer_superscripts:
         for span in spans:
+            if not 'flags' in span:
+                continue
             flags = span['flags']
             if flags & 2**0: # superscript flag
                 if is_int(span['text']):
     return table_structures
+def nms(objects, match_criteria="object2_overlap", match_threshold=0.05, keep_higher=True):
     """
     A customizable version of non-maxima suppression (NMS).
     objects: set of dicts; each object dict must have a 'bbox' and a 'score' field
     match_criteria: how to measure how much two objects "overlap"
     match_threshold: the cutoff for determining that overlap requires suppression of one object
     keep_higher: if True, keep the object with the higher metric; otherwise, keep the lower
     """
     if len(objects) == 0:
         return []
+    objects = sort_objects_by_score(objects, reverse=keep_higher)
     num_objects = len(objects)
     suppression = [False for obj in objects]
     for object2_num in range(1, num_objects):
         object2_rect = Rect(objects[object2_num]['bbox'])
+        object2_area = object2_rect.get_area()
         for object1_num in range(object2_num):
             if not suppression[object1_num]:
                 object1_rect = Rect(objects[object1_num]['bbox'])
+                object1_area = object1_rect.get_area()
+                intersect_area = object1_rect.intersect(object2_rect).get_area()
                 try:
                     if match_criteria=="object1_overlap":
                         metric = intersect_area / object1_area
             cell['subcell'] = False
             for supercell in supercells:
                 supercell_rect = Rect(list(supercell['bbox']))
+                if (supercell_rect.intersect(cell_rect).get_area()
+                        / cell_rect.get_area()) > 0.5:
                     cell['subcell'] = True
                     break
         header = True
         for subcell in subcells:
             subcell_rect = Rect(list(subcell['bbox']))
+            subcell_rect_area = subcell_rect.get_area()
+            if (subcell_rect.intersect(supercell_rect).get_area()
                     / subcell_rect_area) > 0.5:
                 if cell_rect is None:
                     cell_rect = Rect(list(subcell['bbox']))
         for column_num in cell['column_nums']:
             column_rect.include_rect(list(columns[column_num]['bbox']))
         cell_rect = row_rect.intersect(column_rect)
+        if cell_rect.get_area() > 0:
             cell['bbox'] = list(cell_rect)
             pass

requirements.txt CHANGED Viewed

@@ -1,19 +1,78 @@
--e git+https://github.com/mindee/doctr.git#egg=python-doctr[tf]
-streamlit>=0.65.0
-PyMuPDF>=1.16.0,!=1.18.11,!=1.18.12,!=1.19.5
-tf2onnx==1.13.0
-Pillow==9.2.0
-pytesseract==0.3.10
-torch==1.12.0
-torchvision==0.13.0
-beautifulsoup4==4.11.1
-psutil
-numpy>=1.21.6
-scipy>=1.7.3
-thop>=0.1.1
-tqdm>=4.64.1
 gitpython>=3.1.30
-matplotlib>=3.5.3
-pandas>=1.3.5
-seaborn>=0.12.0
-setuptools>=65.5.1

+# PaddleOCR
+shapely
+scikit-image
+imgaug
+pyclipper
+lmdb
+tqdm
+numpy
+visualdl
+rapidfuzz
+opencv-python==4.6.0.66
+opencv-contrib-python==4.6.0.66
+cython
+lxml
+premailer
+openpyxl
+attrdict
+Polygon3
+lanms-neo==1.0.2
+PyMuPDF<1.21.0
+paddleocr
+paddlepaddle
+paddlehub
+# YOLOv5
+# YOLOv5 requirements
+# Usage: pip install -r requirements.txt
+# Base ------------------------------------------------------------------------
 gitpython>=3.1.30
+matplotlib>=3.2.2
+numpy>=1.18.5
+opencv-python>=4.1.1
+Pillow>=7.1.2
+psutil  # system resources
+PyYAML>=5.3.1
+requests>=2.23.0
+scipy>=1.4.1
+thop>=0.1.1  # FLOPs computation
+torch>=1.7.0  # see https://pytorch.org/get-started/locally (recommended)
+torchvision>=0.8.1
+tqdm>=4.64.0
+# protobuf<=3.20.1  # https://github.com/ultralytics/yolov5/issues/8012
+# Logging ---------------------------------------------------------------------
+tensorboard>=2.4.1
+# clearml>=1.2.0
+# comet
+# Plotting --------------------------------------------------------------------
+pandas>=1.1.4
+seaborn>=0.11.0
+# Export ----------------------------------------------------------------------
+# coremltools>=6.0  # CoreML export
+# onnx>=1.12.0  # ONNX export
+# onnx-simplifier>=0.4.1  # ONNX simplifier
+# nvidia-pyindex  # TensorRT export
+# nvidia-tensorrt  # TensorRT export
+# scikit-learn<=1.1.2  # CoreML quantization
+# tensorflow>=2.4.1  # TF exports (-cpu, -aarch64, -macos)
+# tensorflowjs>=3.9.0  # TF.js export
+# openvino-dev  # OpenVINO export
+# Deploy ----------------------------------------------------------------------
+setuptools>=65.5.1 # Snyk vulnerability fix
+# tritonclient[all]~=2.24.0
+# Extras ----------------------------------------------------------------------
+# ipython  # interactive notebook
+# mss  # screenshots
+# albumentations>=1.0.3
+# pycocotools>=2.0.6  # COCO mAP
+# ultralytics  # HUB https://hub.ultralytics.com
+# Other
+pytesseract==0.3.10
+# beautifulsoup4==4.11.1

weights/detection_wts.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32127c7362c16c5839cb95c942cbc9ad1412fd953eb4b0b93758a49f01e312cb
+size 14397685