Spaces:
Paused
Paused
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
import os, sys | |
sys.path.insert( | |
0, | |
os.path.abspath( | |
os.path.join( | |
os.path.dirname( | |
os.path.abspath(__file__)), | |
'../../'))) | |
from deepdoc.vision.seeit import draw_box | |
from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out | |
from api.utils.file_utils import get_project_base_directory | |
import argparse | |
import re | |
import numpy as np | |
def main(args): | |
images, outputs = init_in_out(args) | |
if args.mode.lower() == "layout": | |
labels = LayoutRecognizer.labels | |
detr = Recognizer( | |
labels, | |
"layout", | |
os.path.join( | |
get_project_base_directory(), | |
"rag/res/deepdoc/")) | |
if args.mode.lower() == "tsr": | |
labels = TableStructureRecognizer.labels | |
detr = TableStructureRecognizer() | |
ocr = OCR() | |
layouts = detr(images, float(args.threshold)) | |
for i, lyt in enumerate(layouts): | |
if args.mode.lower() == "tsr": | |
#lyt = [t for t in lyt if t["type"] == "table column"] | |
html = get_table_html(images[i], lyt, ocr) | |
with open(outputs[i] + ".html", "w+") as f: | |
f.write(html) | |
lyt = [{ | |
"type": t["label"], | |
"bbox": [t["x0"], t["top"], t["x1"], t["bottom"]], | |
"score": t["score"] | |
} for t in lyt] | |
img = draw_box(images[i], lyt, labels, float(args.threshold)) | |
img.save(outputs[i], quality=95) | |
print("save result to: " + outputs[i]) | |
def get_table_html(img, tb_cpns, ocr): | |
boxes = ocr(np.array(img)) | |
boxes = Recognizer.sort_Y_firstly( | |
[{"x0": b[0][0], "x1": b[1][0], | |
"top": b[0][1], "text": t[0], | |
"bottom": b[-1][1], | |
"layout_type": "table", | |
"page_number": 0} for b, t in boxes if b[0][0] <= b[1][0] and b[0][1] <= b[-1][1]], | |
np.mean([b[-1][1] - b[0][1] for b, _ in boxes]) / 3 | |
) | |
def gather(kwd, fzy=10, ption=0.6): | |
nonlocal boxes | |
eles = Recognizer.sort_Y_firstly( | |
[r for r in tb_cpns if re.match(kwd, r["label"])], fzy) | |
eles = Recognizer.layouts_cleanup(boxes, eles, 5, ption) | |
return Recognizer.sort_Y_firstly(eles, 0) | |
headers = gather(r".*header$") | |
rows = gather(r".* (row|header)") | |
spans = gather(r".*spanning") | |
clmns = sorted([r for r in tb_cpns if re.match( | |
r"table column$", r["label"])], key=lambda x: x["x0"]) | |
clmns = Recognizer.layouts_cleanup(boxes, clmns, 5, 0.5) | |
for b in boxes: | |
ii = Recognizer.find_overlapped_with_threashold(b, rows, thr=0.3) | |
if ii is not None: | |
b["R"] = ii | |
b["R_top"] = rows[ii]["top"] | |
b["R_bott"] = rows[ii]["bottom"] | |
ii = Recognizer.find_overlapped_with_threashold(b, headers, thr=0.3) | |
if ii is not None: | |
b["H_top"] = headers[ii]["top"] | |
b["H_bott"] = headers[ii]["bottom"] | |
b["H_left"] = headers[ii]["x0"] | |
b["H_right"] = headers[ii]["x1"] | |
b["H"] = ii | |
ii = Recognizer.find_horizontally_tightest_fit(b, clmns) | |
if ii is not None: | |
b["C"] = ii | |
b["C_left"] = clmns[ii]["x0"] | |
b["C_right"] = clmns[ii]["x1"] | |
ii = Recognizer.find_overlapped_with_threashold(b, spans, thr=0.3) | |
if ii is not None: | |
b["H_top"] = spans[ii]["top"] | |
b["H_bott"] = spans[ii]["bottom"] | |
b["H_left"] = spans[ii]["x0"] | |
b["H_right"] = spans[ii]["x1"] | |
b["SP"] = ii | |
html = """ | |
<html> | |
<head> | |
<style> | |
._table_1nkzy_11 { | |
margin: auto; | |
width: 70%%; | |
padding: 10px; | |
} | |
._table_1nkzy_11 p { | |
margin-bottom: 50px; | |
border: 1px solid #e1e1e1; | |
} | |
caption { | |
color: #6ac1ca; | |
font-size: 20px; | |
height: 50px; | |
line-height: 50px; | |
font-weight: 600; | |
margin-bottom: 10px; | |
} | |
._table_1nkzy_11 table { | |
width: 100%%; | |
border-collapse: collapse; | |
} | |
th { | |
color: #fff; | |
background-color: #6ac1ca; | |
} | |
td:hover { | |
background: #c1e8e8; | |
} | |
tr:nth-child(even) { | |
background-color: #f2f2f2; | |
} | |
._table_1nkzy_11 th, | |
._table_1nkzy_11 td { | |
text-align: center; | |
border: 1px solid #ddd; | |
padding: 8px; | |
} | |
</style> | |
</head> | |
<body> | |
%s | |
</body> | |
</html> | |
""" % TableStructureRecognizer.construct_table(boxes, html=True) | |
return html | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--inputs', | |
help="Directory where to store images or PDFs, or a file path to a single image or PDF", | |
required=True) | |
parser.add_argument('--output_dir', help="Directory where to store the output images. Default: './layouts_outputs'", | |
default="./layouts_outputs") | |
parser.add_argument( | |
'--threshold', | |
help="A threshold to filter out detections. Default: 0.5", | |
default=0.5) | |
parser.add_argument('--mode', help="Task mode: layout recognition or table structure recognition", choices=["layout", "tsr"], | |
default="layout") | |
args = parser.parse_args() | |
main(args) | |