Spaces:

RapidAI
/

TableStructureRec

Running

App Files Files Community

Joker1212 commited on Sep 30, 2024

Commit

3b5e13f

verified ·

1 Parent(s): 9aa3049

Upload 4 files

Browse files

Files changed (3) hide show

app.py +172 -0
requirements.txt +8 -0
utils.py +226 -0

app.py ADDED Viewed

	@@ -0,0 +1,172 @@

+import time
+import cv2
+import gradio as gr
+from lineless_table_rec import LinelessTableRecognition
+from paddleocr import PPStructure
+from rapid_table import RapidTable
+from rapidocr_onnxruntime import RapidOCR
+from table_cls import TableCls
+from wired_table_rec import WiredTableRecognition
+from utils import plot_rec_box, LoadImage, format_html, box_4_2_poly_to_box_4_1
+img_loader = LoadImage()
+table_rec_path = "models/table_rec/ch_ppstructure_mobile_v2_SLANet.onnx"
+det_model_dir = {
+    "mobile_det": "models/ocr/ch_PP-OCRv4_det_infer.onnx",
+    "server_det": "models/ocr/ch_PP-OCRv4_det_server_infer.onnx"
+}
+rec_model_dir = {
+    "mobile_rec": "models/ocr/ch_PP-OCRv4_rec_infer.onnx",
+    "server_rec": "models/ocr/ch_PP-OCRv4_rec_server_infer.onnx"
+}
+table_engine_list = [
+    "auto",
+    "rapid_table",
+    "wired_table_v2",
+    "pp_table",
+    "wired_table_v1",
+    "lineless_table"
+]
+# 示例图片路径
+example_images = [
+    "images/lineless1.png",
+    "images/wired1.png",
+    "images/lineless2.png",
+    "images/wired2.png",
+    "images/lineless3.jpg",
+    "images/wired3.png",
+]
+rapid_table_engine = RapidTable(model_path=table_rec_path)
+wired_table_engine_v1 = WiredTableRecognition(version="v1")
+wired_table_engine_v2 = WiredTableRecognition(version="v2")
+lineless_table_engine = LinelessTableRecognition()
+table_cls = TableCls()
+ocr_engine_dict = {}
+pp_engine_dict = {}
+for det_model in det_model_dir.keys():
+    for rec_model in rec_model_dir.keys():
+        det_model_path = det_model_dir[det_model]
+        rec_model_path = rec_model_dir[rec_model]
+        key = f"{det_model}_{rec_model}"
+        ocr_engine_dict[key] = RapidOCR(det_model_path=det_model_path, rec_model_path=rec_model_path)
+        pp_engine_dict[key] = PPStructure(
+            layout=False,
+            show_log=False,
+            table=True,
+            use_onnx=True,
+            table_model_dir=table_rec_path,
+            det_model_dir=det_model_path,
+            rec_model_dir=rec_model_path
+        )
+def select_ocr_model(det_model, rec_model):
+    return ocr_engine_dict[f"{det_model}_{rec_model}"]
+def select_table_model(img, table_engine_type, det_model, rec_model):
+    if table_engine_type == "rapid_table":
+        return rapid_table_engine, 0
+    elif table_engine_type == "wired_table_v1":
+        return wired_table_engine_v1, 0
+    elif table_engine_type == "wired_table_v2":
+        print("使用v2 wired table")
+        return wired_table_engine_v2, 0
+    elif table_engine_type == "lineless_table":
+        return lineless_table_engine, 0
+    elif table_engine_type == "pp_table":
+        return pp_engine_dict[f"{det_model}_{rec_model}"], 0
+    elif table_engine_type == "auto":
+        cls, elasp = table_cls(img)
+        if cls == 'wired':
+            table_engine = wired_table_engine_v2
+        else:
+            table_engine = lineless_table_engine
+        return table_engine, elasp
+def process_image(img, table_engine_type, det_model, rec_model):
+    img = img_loader(img)
+    start = time.time()
+    table_engine, select_elapse = select_table_model(img, table_engine_type, det_model, rec_model)
+    ocr_engine = select_ocr_model(det_model, rec_model)
+    if isinstance(table_engine, PPStructure):
+        result = table_engine(img, return_ocr_result_in_table=True)
+        html = result[0]['res']['html']
+        polygons = result[0]['res']['cell_bbox']
+        polygons = [[polygon[0], polygon[1], polygon[4], polygon[5]] for polygon in polygons]
+        ocr_boxes = result[0]['res']['boxes']
+        all_elapse = f"- `table all cost: {time.time() - start:.5f}`"
+    else:
+        ocr_res, ocr_infer_elapse = ocr_engine(img)
+        det_cost, cls_cost, rec_cost = ocr_infer_elapse
+        ocr_boxes = [box_4_2_poly_to_box_4_1(ori_ocr[0]) for ori_ocr in ocr_res]
+        if isinstance(table_engine, RapidTable):
+            html, polygons, table_rec_elapse = table_engine(img, ocr_result=ocr_res)
+        elif isinstance(table_engine, (WiredTableRecognition, LinelessTableRecognition)):
+            html, table_rec_elapse, polygons, _, _ = table_engine(img, ocr_result=ocr_res)
+        sum_elapse = time.time() - start
+        all_elapse = f"- table all cost: {sum_elapse:.5f}\n - table rec cost: {table_rec_elapse:.5f}\n - ocr cost: {det_cost + cls_cost + rec_cost:.5f}"
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    table_boxes_img = plot_rec_box(img.copy(), polygons)
+    ocr_boxes_img = plot_rec_box(img.copy(), ocr_boxes)
+    complete_html = format_html(html)
+    return complete_html, table_boxes_img, ocr_boxes_img, all_elapse
+def main():
+    det_models_labels = list(det_model_dir.keys())
+    rec_models_labels = list(rec_model_dir.keys())
+    with gr.Blocks() as demo:
+        with gr.Row():  # 两列布局
+            with gr.Column():  # 左边列
+                img_input = gr.Image(label="Upload or Select Image",  sources="upload")
+                # 示例图片选择器
+                examples = gr.Examples(
+                    examples=example_images,
+                    inputs=img_input,
+                    fn=lambda x: x,  # 简单返回图片路径
+                    outputs=img_input,
+                    cache_examples=True
+                )
+                table_engine_type = gr.Dropdown(table_engine_list, label="Select Table Recognition Engine",
+                                                value=table_engine_list[0])
+                det_model = gr.Dropdown(det_models_labels, label="Select OCR Detection Model",
+                                        value=det_models_labels[0])
+                rec_model = gr.Dropdown(rec_models_labels, label="Select OCR Recognition Model",
+                                        value=rec_models_labels[0])
+                run_button = gr.Button("Run")
+                gr.Markdown("# Elapsed Time")
+                elapse_text = gr.Text(label="")  # 使用 `gr.Text` 组件展示字符串
+            with gr.Column():  # 右边列
+                # 使用 Markdown 标题分隔各个组件
+                gr.Markdown("# Html Render")
+                html_output = gr.HTML(label="", elem_classes="scrollable-container")
+                gr.Markdown("# Table Boxes")
+                table_boxes_output = gr.Image(label="")
+                gr.Markdown("# OCR Boxes")
+                ocr_boxes_output = gr.Image(label="")
+        run_button.click(
+            fn=process_image,
+            inputs=[img_input, table_engine_type, det_model, rec_model],
+            outputs=[html_output, table_boxes_output, ocr_boxes_output, elapse_text]
+        )
+    demo.launch()
+if __name__ == '__main__':
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+wired_table_rec
+lineless_table_rec
+table_cls
+rapid_table
+paddleocr>=2.6.0.3
+paddlepaddle
+rapidocr_onnxruntime
+gradio

utils.py ADDED Viewed

	@@ -0,0 +1,226 @@

+from io import BytesIO
+from pathlib import Path
+from typing import Union, List
+import numpy as np
+import cv2
+from PIL import UnidentifiedImageError, Image
+InputType = Union[str, np.ndarray, bytes, Path, Image.Image]
+class LoadImage:
+    def __init__(
+            self,
+    ):
+        pass
+    def __call__(self, img: InputType) -> np.ndarray:
+        if not isinstance(img, InputType.__args__):
+            raise LoadImageError(
+                f"The img type {type(img)} does not in {InputType.__args__}"
+            )
+        origin_img_type = type(img)
+        img = self.load_img(img)
+        img = self.convert_img(img, origin_img_type)
+        return img
+    def load_img(self, img: InputType) -> np.ndarray:
+        if isinstance(img, (str, Path)):
+            self.verify_exist(img)
+            try:
+                img = np.array(Image.open(img))
+            except UnidentifiedImageError as e:
+                raise LoadImageError(f"cannot identify image file {img}") from e
+            return img
+        if isinstance(img, bytes):
+            img = np.array(Image.open(BytesIO(img)))
+            return img
+        if isinstance(img, BytesIO):
+            img = np.array(Image.open(img))
+            return img
+        if isinstance(img, np.ndarray):
+            return img
+        if isinstance(img, Image.Image):
+            return np.array(img)
+        raise LoadImageError(f"{type(img)} is not supported!")
+    def convert_img(self, img: np.ndarray, origin_img_type):
+        if img.ndim == 2:
+            return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        if img.ndim == 3:
+            channel = img.shape[2]
+            if channel == 1:
+                return cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+            if channel == 2:
+                return self.cvt_two_to_three(img)
+            if channel == 3:
+                if issubclass(origin_img_type, (str, Path, bytes, Image.Image)):
+                    return cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
+                return img
+            if channel == 4:
+                return self.cvt_four_to_three(img)
+            raise LoadImageError(
+                f"The channel({channel}) of the img is not in [1, 2, 3, 4]"
+            )
+        raise LoadImageError(f"The ndim({img.ndim}) of the img is not in [2, 3]")
+    @staticmethod
+    def cvt_two_to_three(img: np.ndarray) -> np.ndarray:
+        """gray + alpha → BGR"""
+        img_gray = img[..., 0]
+        img_bgr = cv2.cvtColor(img_gray, cv2.COLOR_GRAY2BGR)
+        img_alpha = img[..., 1]
+        not_a = cv2.bitwise_not(img_alpha)
+        not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR)
+        new_img = cv2.bitwise_and(img_bgr, img_bgr, mask=img_alpha)
+        new_img = cv2.add(new_img, not_a)
+        return new_img
+    @staticmethod
+    def cvt_four_to_three(img: np.ndarray) -> np.ndarray:
+        """RGBA → BGR"""
+        r, g, b, a = cv2.split(img)
+        new_img = cv2.merge((b, g, r))
+        not_a = cv2.bitwise_not(a)
+        not_a = cv2.cvtColor(not_a, cv2.COLOR_GRAY2BGR)
+        new_img = cv2.bitwise_and(new_img, new_img, mask=a)
+        new_img = cv2.add(new_img, not_a)
+        return new_img
+    @staticmethod
+    def verify_exist(file_path: Union[str, Path]):
+        if not Path(file_path).exists():
+            raise LoadImageError(f"{file_path} does not exist.")
+class LoadImageError(Exception):
+    pass
+def plot_rec_box_with_logic_info(img_path, logic_points, sorted_polygons, without_text=True):
+    """
+    :param img_path
+    :param output_path
+    :param logic_points: [row_start,row_end,col_start,col_end]
+    :param sorted_polygons: [xmin,ymin,xmax,ymax]
+    :return:
+    """
+    # 读取原图
+    img = cv2.imread(img_path)
+    img = cv2.copyMakeBorder(
+        img, 0, 0, 0, 100, cv2.BORDER_CONSTANT, value=[255, 255, 255]
+    )
+    # 绘制 polygons 矩形
+    for idx, polygon in enumerate(sorted_polygons):
+        x0, y0, x1, y1 = polygon[0], polygon[1], polygon[2], polygon[3]
+        x0 = round(x0)
+        y0 = round(y0)
+        x1 = round(x1)
+        y1 = round(y1)
+        cv2.rectangle(img, (x0, y0), (x1, y1), (0, 0, 255), 1)
+        # 增大字体大小和线宽
+        font_scale = 1.0  # 原先是0.5
+        thickness = 2  # 原先是1
+        if without_text:
+            return img
+        cv2.putText(
+            img,
+            f"{idx}",
+            (x1, y1),
+            cv2.FONT_HERSHEY_PLAIN,
+            font_scale,
+            (0, 0, 255),
+            thickness,
+        )
+        return img
+def plot_rec_box(img, sorted_polygons):
+    """
+    :param img_path
+    :param output_path
+    :param sorted_polygons: [xmin,ymin,xmax,ymax]
+    :return:
+    """
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+    # 处理ocr_res
+    img = cv2.copyMakeBorder(
+        img, 0, 0, 0, 100, cv2.BORDER_CONSTANT, value=[255, 255, 255]
+    )
+    # 绘制 ocr_res 矩形
+    for idx, polygon in enumerate(sorted_polygons):
+        x0, y0, x1, y1 = polygon[0], polygon[1], polygon[2], polygon[3]
+        x0 = round(x0)
+        y0 = round(y0)
+        x1 = round(x1)
+        y1 = round(y1)
+        cv2.rectangle(img, (x0, y0), (x1, y1), (0, 0, 255), 1)
+        # 增大字体大小和线宽
+        font_scale = 1.0  # 原先是0.5
+        thickness = 2  # 原先是1
+        # cv2.putText(
+        #     img,
+        #     str(idx),
+        #     (x1, y1),
+        #     cv2.FONT_HERSHEY_PLAIN,
+        #     font_scale,
+        #     (0, 0, 255),
+        #     thickness,
+        # )
+    return img
+def format_html(html:str):
+    html = html.replace("<html>","")
+    html = html.replace("</html>","")
+    html = html.replace("<body>", "")
+    html = html.replace("</body>", "")
+    return f"""
+    <!DOCTYPE html>
+    <html lang="zh-CN">
+    <head>
+    <meta charset="UTF-8">
+    <title>Complex Table Example</title>
+    <style>
+        table {{
+            border-collapse: collapse;
+            width: 100%;
+        }}
+        th, td {{
+            border: 1px solid black;
+            padding: 8px;
+            text-align: center;
+        }}
+        th {{
+            background-color: #f2f2f2;
+        }}
+    </style>
+    </head>
+    <body>
+    {html}
+    </body>
+    </html>
+    """
+def box_4_2_poly_to_box_4_1(poly_box: Union[np.ndarray, list]) -> List[float]:
+    """
+    将poly_box转换为box_4_1
+    :param poly_box:
+    :return:
+    """
+    return [poly_box[0][0], poly_box[0][1], poly_box[2][0], poly_box[2][1]]