Spaces:

bachpc
/

table-structure-recognition

Build error

App Files Files Community

bachpc commited on Apr 7, 2023

Commit

2a27a15

1 Parent(s): 9e9067e

Improve visualization

Browse files

Files changed (1) hide show

app.py +86 -12

app.py CHANGED Viewed

@@ -4,10 +4,14 @@ import cv2
 import numpy as np
 import pandas as pd
 import torch
 # import sys
 # import json
 from collections import OrderedDict, defaultdict
 import xml.etree.ElementTree as ET
 from paddleocr import PaddleOCR
 import pytesseract
 from pytesseract import Output
@@ -80,10 +84,15 @@ def crop_image(pil_img, detection_result, padding=30):
         x2 = min(width, int((min_x + w / 2) * width) + padding)
         y2 = min(height, int((min_y + h / 2) * height) + padding)
         # print(x1, y1, x2, y2)
         crop_image = image[y1:y2, x1:x2, :]
-        crop_images.append(cv_to_PIL(crop_image))
-        cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
     return crop_images, cv_to_PIL(image)
@@ -169,15 +178,39 @@ def visualize_ocr(pil_img, ocr_result):
         x2 = int(bbox[2])
         y2 = int(bbox[3])
         cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
-        cv2.putText(image, res['text'], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.5, color=(0, 0, 255))
     return cv_to_PIL(image)
 def visualize_structure(pil_img, structure_result):
     image = PIL_to_cv(pil_img)
     width = image.shape[1]
     height = image.shape[0]
     # print(width, height)
     for i, result in enumerate(structure_result):
         class_id = int(result[5])
         score = float(result[4])
@@ -191,24 +224,65 @@ def visualize_structure(pil_img, structure_result):
         x2 = int((min_x + w / 2) * width)
         y2 = int((min_y + h / 2) * height)
         # print(x1, y1, x2, y2)
         if score >= structure_class_thresholds[structure_class_names[class_id]]:
-            cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
             #cv2.putText(image, str(i)+'-'+str(class_id), (x1-10, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))
-    return cv_to_PIL(image)
 def visualize_cells(pil_img, cells):
-    image = PIL_to_cv(pil_img)
     for i, cell in enumerate(cells):
         bbox = cell['bbox']
-        x1 = int(bbox[0])
-        y1 = int(bbox[1])
-        x2 = int(bbox[2])
-        y2 = int(bbox[3])
-        cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
-    return cv_to_PIL(image)
 def pytess(cell_pil_img):

 import numpy as np
 import pandas as pd
 import torch
+import io
 # import sys
 # import json
 from collections import OrderedDict, defaultdict
 import xml.etree.ElementTree as ET
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
 from paddleocr import PaddleOCR
 import pytesseract
 from pytesseract import Output
         x2 = min(width, int((min_x + w / 2) * width) + padding)
         y2 = min(height, int((min_y + h / 2) * height) + padding)
         # print(x1, y1, x2, y2)
         crop_image = image[y1:y2, x1:x2, :]
+        crop_image = cv_to_PIL(crop_image)
+        if class_id == 1:  # table rotated
+            crop_image = crop_image.rotate(270, expand=True)
+        crop_images.append(crop_image)
+        cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 0, 255))
     return crop_images, cv_to_PIL(image)
         x2 = int(bbox[2])
         y2 = int(bbox[3])
         cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
+        cv2.putText(image, res['text'], (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.25, color=(255, 0, 0))
     return cv_to_PIL(image)
+def get_bbox_decorations(data_type, label):
+    if label == 0:
+        if data_type == 'detection':
+            return 'brown', 0.05, 3, '//'
+        else:
+            return 'brown', 0, 3, None
+    elif label == 1:
+        return 'red', 0.15, 2, None
+    elif label == 2:
+        return 'blue', 0.15, 2, None
+    elif label == 3:
+        return 'magenta', 0.2, 3, '//'
+    elif label == 4:
+        return 'cyan', 0.2, 4, '//'
+    elif label == 5:
+        return 'green', 0.2, 4, '\\\\'
+    return 'gray', 0, 0, None
 def visualize_structure(pil_img, structure_result):
     image = PIL_to_cv(pil_img)
     width = image.shape[1]
     height = image.shape[0]
     # print(width, height)
+    fig, ax = plt.subplots(1)
+    ax.imshow(pil_img, interpolation='lanczos')
     for i, result in enumerate(structure_result):
         class_id = int(result[5])
         score = float(result[4])
         x2 = int((min_x + w / 2) * width)
         y2 = int((min_y + h / 2) * height)
         # print(x1, y1, x2, y2)
+        bbox = [x1, y1, x2, y2]
         if score >= structure_class_thresholds[structure_class_names[class_id]]:
+            #cv2.rectangle(image, (x1, y1), (x2, y2), color=(0, 255, 0))
             #cv2.putText(image, str(i)+'-'+str(class_id), (x1-10, y1), cv2.FONT_HERSHEY_SIMPLEX, fontScale=1, color=(0,0,255))
+            color, alpha, linewidth, hatch = get_bbox_decorations('recognition', class_id)
+            # Fill
+            rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
+                                     linewidth=linewidth, alpha=alpha,
+                                     edgecolor='none',facecolor=color,
+                                     linestyle=None)
+            ax.add_patch(rect)
+            # Hatch
+            rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
+                                     linewidth=1, alpha=0.4,
+                                     edgecolor=color,facecolor='none',
+                                     linestyle='--',hatch=hatch)
+            ax.add_patch(rect)
+            # Edge
+            rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1],
+                                     linewidth=linewidth,
+                                     edgecolor=color,facecolor='none',
+                                     linestyle="--")
+            ax.add_patch(rect)
+    plt.axis('off')
+    img_buf = io.BytesIO()
+    plt.savefig(img_buf, bbox_inches='tight', dpi=100)
+    return PIL.Image.open(img_buf)
 def visualize_cells(pil_img, cells):
+    fig, ax = plt.subplots(1)
+    ax.imshow(pil_img, interpolation='lanczos')
     for i, cell in enumerate(cells):
         bbox = cell['bbox']
+        if cell['header']:
+            alpha = 0.3
+        else:
+            alpha = 0.125
+        rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1], linewidth=1,
+                                edgecolor='none',facecolor="magenta", alpha=alpha)
+        ax.add_patch(rect)
+        rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1], linewidth=1,
+                                edgecolor="magenta",facecolor='none',linestyle="--",
+                                alpha=0.08, hatch='///')
+        ax.add_patch(rect)
+        rect = patches.Rectangle(bbox[:2], bbox[2]-bbox[0], bbox[3]-bbox[1], linewidth=1,
+                                edgecolor="magenta",facecolor='none',linestyle="--")
+        ax.add_patch(rect)
+    plt.axis('off')
+    img_buf = io.BytesIO()
+    plt.savefig(img_buf, bbox_inches='tight', dpi=100)
+    return PIL.Image.open(img_buf)
 def pytess(cell_pil_img):