Charles Kabui
commited on
Commit
·
a7b5719
1
Parent(s):
390e6ec
similarity display
Browse files- analysis.ipynb +19 -25
- app.py +9 -88
- main.py +140 -0
- utils/get_features.py +127 -0
- utils/split_image.py +19 -0
analysis.ipynb
CHANGED
@@ -11,9 +11,18 @@
|
|
11 |
},
|
12 |
{
|
13 |
"cell_type": "code",
|
14 |
-
"execution_count":
|
15 |
"metadata": {},
|
16 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
"source": [
|
18 |
"%load_ext autoreload\n",
|
19 |
"%autoreload 2\n",
|
@@ -329,6 +338,10 @@
|
|
329 |
"name": "stdout",
|
330 |
"output_type": "stream",
|
331 |
"text": [
|
|
|
|
|
|
|
|
|
332 |
"Keyboard interruption in main thread... closing server.\n"
|
333 |
]
|
334 |
},
|
@@ -342,30 +355,11 @@
|
|
342 |
}
|
343 |
],
|
344 |
"source": [
|
345 |
-
"from
|
346 |
-
"\n",
|
347 |
-
"app()"
|
348 |
-
]
|
349 |
-
},
|
350 |
-
{
|
351 |
-
"cell_type": "code",
|
352 |
-
"execution_count": null,
|
353 |
-
"metadata": {},
|
354 |
-
"outputs": [],
|
355 |
-
"source": [
|
356 |
-
"from utils.get_RGB_image import get_RGB_image\n",
|
357 |
-
"from pdf2image import convert_from_path\n",
|
358 |
"\n",
|
359 |
-
"
|
360 |
-
|
361 |
-
|
362 |
-
{
|
363 |
-
"cell_type": "code",
|
364 |
-
"execution_count": null,
|
365 |
-
"metadata": {},
|
366 |
-
"outputs": [],
|
367 |
-
"source": [
|
368 |
-
"get_RGB_image(pdf[0]) "
|
369 |
]
|
370 |
}
|
371 |
],
|
|
|
11 |
},
|
12 |
{
|
13 |
"cell_type": "code",
|
14 |
+
"execution_count": 2,
|
15 |
"metadata": {},
|
16 |
+
"outputs": [
|
17 |
+
{
|
18 |
+
"name": "stdout",
|
19 |
+
"output_type": "stream",
|
20 |
+
"text": [
|
21 |
+
"The autoreload extension is already loaded. To reload it, use:\n",
|
22 |
+
" %reload_ext autoreload\n"
|
23 |
+
]
|
24 |
+
}
|
25 |
+
],
|
26 |
"source": [
|
27 |
"%load_ext autoreload\n",
|
28 |
"%autoreload 2\n",
|
|
|
338 |
"name": "stdout",
|
339 |
"output_type": "stream",
|
340 |
"text": [
|
341 |
+
"tensor(0)\n",
|
342 |
+
"tensor(0.)\n",
|
343 |
+
"['Text', 'Picture', 'Text', 'Picture', 'Title', 'Text', 'Picture', 'Text', 'Text', 'Picture', 'Picture', 'Text', 'Text', 'Title', 'Section-header', 'Picture', 'Title', 'Picture', 'Picture', 'Picture', 'Section-header', 'Title', 'Picture', 'Caption', 'Title', 'Text', 'Text', 'Picture', 'Caption', 'Title', 'Text', 'Title', 'Text', 'Page-header', 'Section-header', 'Section-header', 'Caption', 'Title', 'Page-header', 'Section-header', 'Section-header', 'Page-header', 'Text', 'Picture', 'Caption', 'Text', 'Caption', 'Text', 'Picture', 'Page-header', 'Title', 'Picture', 'Picture', 'Text', 'Page-footer', 'Section-header', 'Caption', 'Section-header', 'Title', 'Text', 'Picture', 'Page-header', 'Picture', 'Caption', 'Caption', 'Section-header', 'Section-header', 'Picture', 'Section-header', 'Title', 'Picture', 'Page-footer', 'Caption', 'Title', 'Text', 'Picture', 'Title', 'Picture', 'Text', 'Text', 'Section-header', 'Picture', 'Picture', 'Section-header', 'Caption', 'Text']\n",
|
344 |
+
"{0: 0.17, 1: 0.24545454545454548, 2: 0.3209090909090909, 3: 0.3963636363636364, 4: 0.4718181818181819, 5: 0.5472727272727274, 6: 0.6227272727272728, 7: 0.6981818181818182, 8: 0.7736363636363638, 9: 0.8490909090909092, 10: 0.9245454545454547}\n",
|
345 |
"Keyboard interruption in main thread... closing server.\n"
|
346 |
]
|
347 |
},
|
|
|
355 |
}
|
356 |
],
|
357 |
"source": [
|
358 |
+
"from main import app\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
"\n",
|
360 |
+
"model_path = '../detectron2-layout-parser/model_final.pth'\n",
|
361 |
+
"config_path = '../detectron2-layout-parser/config.yaml'\n",
|
362 |
+
"app(model_path=model_path, config_path=config_path, debug=True)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
363 |
]
|
364 |
}
|
365 |
],
|
app.py
CHANGED
@@ -1,89 +1,10 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
|
4 |
-
|
|
|
|
|
|
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
return gr.HTML(f'<pre>Similarity between the two documents: {xxx}<pre>', visible=True)
|
9 |
-
|
10 |
-
def load_image(filename, page = 0):
|
11 |
-
try:
|
12 |
-
image = None
|
13 |
-
try:
|
14 |
-
if (is_online_file(filename)):
|
15 |
-
image = get_RGB_image(convert_from_bytes(steam_online_file(filename))[page])
|
16 |
-
else:
|
17 |
-
image = get_RGB_image(convert_from_path(filename)[page])
|
18 |
-
except:
|
19 |
-
image = get_RGB_image(filename)
|
20 |
-
return [
|
21 |
-
gr.Image(value=image, visible=True),
|
22 |
-
None
|
23 |
-
]
|
24 |
-
except:
|
25 |
-
error = traceback.format_exc()
|
26 |
-
return [None, gr.HTML(value=error, visible=True)]
|
27 |
-
|
28 |
-
def preview_url(url, page = 0):
|
29 |
-
[image, error] = load_image(url, page = page)
|
30 |
-
if image:
|
31 |
-
return [gr.Tabs(selected=0), image, error]
|
32 |
-
else:
|
33 |
-
return [gr.Tabs(selected=1), image, error]
|
34 |
-
|
35 |
-
def document_view(document_number: int):
|
36 |
-
gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
|
37 |
-
with gr.Tabs() as document_tabs:
|
38 |
-
with gr.Tab("From Image", id=0):
|
39 |
-
document = gr.Image(type="pil", label=f"Document {document_number}", visible=False)
|
40 |
-
document_error_message = gr.HTML(label="Error Message", visible=False)
|
41 |
-
document_preview = gr.UploadButton(
|
42 |
-
"Click to PDF or Document Image",
|
43 |
-
file_types=["image", ".pdf"],
|
44 |
-
file_count="single")
|
45 |
-
with gr.Tab("From URL", id=1):
|
46 |
-
document_url = gr.Textbox(
|
47 |
-
label=f"Document {document_number} URL",
|
48 |
-
info="Paste a Link/URL to PDF or Document Image",
|
49 |
-
placeholder="https://datasets-server.huggingface.co/.../image.jpg")
|
50 |
-
document_url_error_message = gr.HTML(label="Error Message", visible=False)
|
51 |
-
document_url_preview = gr.Button(value="Preview", variant="primary")
|
52 |
-
document_preview.upload(
|
53 |
-
fn = lambda file: load_image(file.name),
|
54 |
-
inputs = [document_preview],
|
55 |
-
outputs = [document, document_error_message])
|
56 |
-
document_url_preview.click(
|
57 |
-
fn = preview_url,
|
58 |
-
inputs = [document_url],
|
59 |
-
outputs = [document_tabs, document, document_url_error_message])
|
60 |
-
return document
|
61 |
-
|
62 |
-
def app():
|
63 |
-
title = 'Document Similarity Search Using Visual Layout Features'
|
64 |
-
description = f"<h2>{title}<h2>"
|
65 |
-
css = '''
|
66 |
-
image { max-height="86vh" !important; }
|
67 |
-
.center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
|
68 |
-
.hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px; border: none; }
|
69 |
-
'''
|
70 |
-
with gr.Blocks(title=title, css=css) as app:
|
71 |
-
with gr.Row():
|
72 |
-
gr.HTML(value=description, elem_classes=['center'])
|
73 |
-
with gr.Row(equal_height = False):
|
74 |
-
with gr.Column():
|
75 |
-
document_1_image = document_view(1)
|
76 |
-
with gr.Column():
|
77 |
-
document_2_image = document_view(2)
|
78 |
-
gr.HTML('<hr/>', elem_classes=['hr'])
|
79 |
-
with gr.Row(elem_classes=['center']):
|
80 |
-
with gr.Column():
|
81 |
-
submit = gr.Button(value="Similarity", variant="primary")
|
82 |
-
reset = gr.Button(value="Reset", variant="secondary")
|
83 |
-
with gr.Column():
|
84 |
-
similarity_output = gr.HTML(visible=False)
|
85 |
-
submit.click(
|
86 |
-
fn=similarity_fn,
|
87 |
-
inputs=[document_1_image, document_2_image],
|
88 |
-
outputs=[similarity_output])
|
89 |
-
return app.launch(debug=True)
|
|
|
1 |
+
import os
|
2 |
+
os.system("apt-get install poppler-utils")
|
3 |
+
os.system("python -m pip install torch")
|
4 |
+
os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'")
|
5 |
+
os.system("python -m pip install layoutparser layoutparser[layoutmodels] layoutparser[ocr]")
|
6 |
+
os.system("python -m pip install Pillow==9.4.0")
|
7 |
+
os.system("python -m pip install imagehash")
|
8 |
|
9 |
+
from main import app
|
10 |
+
app()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
main.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import traceback
|
2 |
+
import gradio as gr
|
3 |
+
from utils.get_RGB_image import get_RGB_image, is_online_file, steam_online_file
|
4 |
+
from pdf2image import convert_from_path, convert_from_bytes
|
5 |
+
import layoutparser as lp
|
6 |
+
from PIL import Image
|
7 |
+
from utils.get_features import get_features
|
8 |
+
from imagehash import average_hash
|
9 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
10 |
+
|
11 |
+
label_map = {0: 'Caption', 1: 'Footnote', 2: 'Formula', 3: 'List-item', 4: 'Page-footer', 5: 'Page-header', 6: 'Picture', 7: 'Section-header', 8: 'Table', 9: 'Text', 10: 'Title'}
|
12 |
+
label_names = list(label_map.values())
|
13 |
+
color_map = {'Caption': '#acc2d9', 'Footnote': '#56ae57', 'Formula': '#b2996e', 'List-item': '#a8ff04', 'Page-footer': '#69d84f', 'Page-header': '#894585', 'Picture': '#70b23f', 'Section-header': '#d4ffff', 'Table': '#65ab7c', 'Text': '#952e8f', 'Title': '#fcfc81'}
|
14 |
+
cache = {
|
15 |
+
'document_image_1_hash': None,
|
16 |
+
'document_image_2_hash': None,
|
17 |
+
'document_image_1_features': None,
|
18 |
+
'document_image_2_features': None,
|
19 |
+
}
|
20 |
+
pre_message_style = 'overflow: auto;border: 2px solid pink;padding: 4px;'
|
21 |
+
|
22 |
+
def similarity_fn(document_image_1: Image.Image, document_image_2: Image.Image, model: lp.Detectron2LayoutModel):
|
23 |
+
message = None
|
24 |
+
try:
|
25 |
+
document_image_1_hash = str(average_hash(document_image_1))
|
26 |
+
document_image_2_hash = str(average_hash(document_image_2))
|
27 |
+
|
28 |
+
if document_image_1_hash == cache['document_image_1_hash']:
|
29 |
+
document_image_1_features = cache['document_image_1_features']
|
30 |
+
else:
|
31 |
+
document_image_1_features = get_features(document_image_1, model, label_names)
|
32 |
+
cache['document_image_1_hash'] = document_image_1_hash
|
33 |
+
cache['document_image_1_features'] = document_image_1_features
|
34 |
+
|
35 |
+
if document_image_2_hash == cache['document_image_2_hash']:
|
36 |
+
document_image_2_features = cache['document_image_2_features']
|
37 |
+
else:
|
38 |
+
document_image_2_features = get_features(document_image_2, model, label_names)
|
39 |
+
cache['document_image_2_hash'] = document_image_2_hash
|
40 |
+
cache['document_image_2_features'] = document_image_2_features
|
41 |
+
|
42 |
+
[[similarity]] = cosine_similarity(
|
43 |
+
[
|
44 |
+
cache['document_image_1_features']['vectors']
|
45 |
+
],
|
46 |
+
[
|
47 |
+
cache['document_image_2_features']['vectors']
|
48 |
+
])
|
49 |
+
message = f'<pre style="{pre_message_style}">Similarity between the two documents is: {similarity}<pre>'
|
50 |
+
except Exception as e:
|
51 |
+
message = f'<pre style="{pre_message_style}">{traceback.format_exc()}<pre>'
|
52 |
+
return gr.HTML(message, visible=True)
|
53 |
+
|
54 |
+
def load_image(filename, page = 0):
|
55 |
+
try:
|
56 |
+
image = None
|
57 |
+
try:
|
58 |
+
if (is_online_file(filename)):
|
59 |
+
image = get_RGB_image(convert_from_bytes(steam_online_file(filename))[page])
|
60 |
+
else:
|
61 |
+
image = get_RGB_image(convert_from_path(filename)[page])
|
62 |
+
except:
|
63 |
+
image = get_RGB_image(filename)
|
64 |
+
return [
|
65 |
+
gr.Image(value=image, visible=True),
|
66 |
+
None
|
67 |
+
]
|
68 |
+
except:
|
69 |
+
error = traceback.format_exc()
|
70 |
+
return [None, gr.HTML(value=error, visible=True)]
|
71 |
+
|
72 |
+
def preview_url(url, page = 0):
|
73 |
+
[image, error] = load_image(url, page = page)
|
74 |
+
if image:
|
75 |
+
return [gr.Tabs(selected=0), image, error]
|
76 |
+
else:
|
77 |
+
return [gr.Tabs(selected=1), image, error]
|
78 |
+
|
79 |
+
def document_view(document_number: int):
|
80 |
+
gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
|
81 |
+
with gr.Tabs() as document_tabs:
|
82 |
+
with gr.Tab("From Image", id=0):
|
83 |
+
document = gr.Image(type="pil", label=f"Document {document_number}", visible=False)
|
84 |
+
document_error_message = gr.HTML(label="Error Message", visible=False)
|
85 |
+
document_preview = gr.UploadButton(
|
86 |
+
"Click to PDF or Document Image",
|
87 |
+
file_types=["image", ".pdf"],
|
88 |
+
file_count="single")
|
89 |
+
with gr.Tab("From URL", id=1):
|
90 |
+
document_url = gr.Textbox(
|
91 |
+
label=f"Document {document_number} URL",
|
92 |
+
info="Paste a Link/URL to PDF or Document Image",
|
93 |
+
placeholder="https://datasets-server.huggingface.co/.../image.jpg")
|
94 |
+
document_url_error_message = gr.HTML(label="Error Message", visible=False)
|
95 |
+
document_url_preview = gr.Button(value="Preview", variant="primary")
|
96 |
+
document_preview.upload(
|
97 |
+
fn = lambda file: load_image(file.name),
|
98 |
+
inputs = [document_preview],
|
99 |
+
outputs = [document, document_error_message])
|
100 |
+
document_url_preview.click(
|
101 |
+
fn = preview_url,
|
102 |
+
inputs = [document_url],
|
103 |
+
outputs = [document_tabs, document, document_url_error_message])
|
104 |
+
return document
|
105 |
+
|
106 |
+
def app(*, model_path, config_path, debug = False):
|
107 |
+
model: lp.Detectron2LayoutModel = lp.Detectron2LayoutModel(
|
108 |
+
config_path = config_path,
|
109 |
+
model_path = model_path,
|
110 |
+
label_map = label_map)
|
111 |
+
title = 'Document Similarity Search Using Visual Layout Features'
|
112 |
+
description = f"<h2>{title}<h2>"
|
113 |
+
css = '''
|
114 |
+
image { max-height="86vh" !important; }
|
115 |
+
.center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
|
116 |
+
.hr { width: 100%; display: block; padding: 0; margin: 0; background: gray; height: 4px; border: none; }
|
117 |
+
'''
|
118 |
+
with gr.Blocks(title=title, css=css) as app:
|
119 |
+
with gr.Row():
|
120 |
+
gr.HTML(value=description, elem_classes=['center'])
|
121 |
+
with gr.Row(equal_height = False):
|
122 |
+
with gr.Column():
|
123 |
+
document_1_image = document_view(1)
|
124 |
+
with gr.Column():
|
125 |
+
document_2_image = document_view(2)
|
126 |
+
gr.HTML('<hr/>', elem_classes=['hr'])
|
127 |
+
with gr.Row(elem_classes=['center']):
|
128 |
+
with gr.Column():
|
129 |
+
submit = gr.Button(value="Similarity", variant="primary")
|
130 |
+
reset = gr.Button(value="Reset", variant="secondary")
|
131 |
+
with gr.Column():
|
132 |
+
similarity_output = gr.HTML(visible=False)
|
133 |
+
submit.click(
|
134 |
+
fn=lambda document_1_image, document_2_image: similarity_fn(
|
135 |
+
document_1_image,
|
136 |
+
document_2_image,
|
137 |
+
model),
|
138 |
+
inputs=[document_1_image, document_2_image],
|
139 |
+
outputs=[similarity_output])
|
140 |
+
return app.launch(debug=debug)
|
utils/get_features.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import layoutparser as lp
|
2 |
+
from PIL import Image
|
3 |
+
import tensorflow as tf
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
import torchvision.ops.boxes as box_ops
|
7 |
+
from typing import List, Tuple
|
8 |
+
from .split_image import split_image
|
9 |
+
from .get_unique_values import get_unique_values
|
10 |
+
|
11 |
+
def get_vectors(*,
|
12 |
+
predicted_bboxes: List[Tuple[int, int, int, int]],
|
13 |
+
predicted_scores: List[float],
|
14 |
+
predicted_labels: List[str],
|
15 |
+
label_names: List[str],
|
16 |
+
sub_images_bboxes: List[Tuple[int, int, int, int]],
|
17 |
+
index_start: int = 0.17,
|
18 |
+
index_end: int = 1,
|
19 |
+
weighted_jaccard_index = False):
|
20 |
+
bboxes_tensor: torch.Tensor = torch.tensor(predicted_bboxes)
|
21 |
+
labels_nonce = { value:key for key, value in zip(get_unique_values(start = index_start, end = index_end, count = len(label_names)), list(label_names)) }
|
22 |
+
|
23 |
+
def get_vector(bbox: Tuple[int, int, int, int], region_nonce: int):
|
24 |
+
# bbox: Expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
|
25 |
+
bbox_tensor: torch.Tensor = torch.tensor([bbox])
|
26 |
+
[jaccard_indexes] = box_ops.box_iou(bbox_tensor, bboxes_tensor)
|
27 |
+
'''
|
28 |
+
Either get the index of bounding box with largest jaccard_index (Intersection Over Union) or
|
29 |
+
get the index of bounding box with largest jaccard_index (Intersection Over Union) multiplied by the score.
|
30 |
+
By doing this we strike a balance between accuracy and relative position.
|
31 |
+
'''
|
32 |
+
index_of_jaccard_index = jaccard_indexes.argmax() if not weighted_jaccard_index else np.multiply(jaccard_indexes, predicted_scores).argmax()
|
33 |
+
jaccard_index = jaccard_indexes[index_of_jaccard_index]
|
34 |
+
print(index_of_jaccard_index)
|
35 |
+
print(jaccard_index)
|
36 |
+
print(predicted_labels)
|
37 |
+
print(labels_nonce)
|
38 |
+
jaccard_index_bbox_label__nonce = labels_nonce[predicted_labels[index_of_jaccard_index]]
|
39 |
+
jaccard_index_bbox_score = predicted_scores[index_of_jaccard_index]
|
40 |
+
vector = region_nonce * jaccard_index * jaccard_index_bbox_label__nonce * jaccard_index_bbox_score
|
41 |
+
return vector.item()
|
42 |
+
sub_images_nonces = get_unique_values(start = index_start, end = index_end, count = len(sub_images_bboxes))
|
43 |
+
for sub_image_bbox, region_nonce in zip(sub_images_bboxes, sub_images_nonces):
|
44 |
+
yield get_vector(sub_image_bbox, region_nonce)
|
45 |
+
|
46 |
+
def get_predictions(
|
47 |
+
image: Image.Image,
|
48 |
+
model: lp.Detectron2LayoutModel,
|
49 |
+
predictions_reducer = lambda *args: args):
|
50 |
+
layout_predicted = model.detect(image)
|
51 |
+
if len(layout_predicted) > 0:
|
52 |
+
predicted_bboxes = [block.coordinates for block in layout_predicted]
|
53 |
+
predicted_scores = [block.score for block in layout_predicted]
|
54 |
+
predicted_labels = [block.type for block in layout_predicted]
|
55 |
+
[predicted_bboxes, predicted_scores, predicted_labels] = predictions_reducer(
|
56 |
+
predicted_bboxes,
|
57 |
+
predicted_scores,
|
58 |
+
predicted_labels)
|
59 |
+
return {
|
60 |
+
'predicted_bboxes': predicted_bboxes,
|
61 |
+
'predicted_scores': predicted_scores,
|
62 |
+
'predicted_labels': predicted_labels,
|
63 |
+
}
|
64 |
+
else:
|
65 |
+
return {
|
66 |
+
'predicted_bboxes': [],
|
67 |
+
'predicted_scores': [],
|
68 |
+
'predicted_labels': [],
|
69 |
+
}
|
70 |
+
|
71 |
+
def predictions_reducer(
|
72 |
+
predicted_bboxes: List[Tuple[int, int, int, int]],
|
73 |
+
predicted_scores: List[float],
|
74 |
+
predicted_labels: List[str]):
|
75 |
+
selected_indices = tf.image.non_max_suppression(
|
76 |
+
boxes = predicted_bboxes,
|
77 |
+
scores = predicted_scores ,
|
78 |
+
max_output_size = len(predicted_bboxes),
|
79 |
+
iou_threshold = 0.01)
|
80 |
+
return {
|
81 |
+
'predicted_bboxes': tf.gather(predicted_bboxes, selected_indices).numpy().tolist(), # List[List[int, int, int, int]]
|
82 |
+
'predicted_scores': tf.gather(predicted_scores, selected_indices).numpy().astype(float).tolist(),
|
83 |
+
'predicted_labels': tf.gather(predicted_labels, selected_indices).numpy().astype(str).tolist()
|
84 |
+
}
|
85 |
+
|
86 |
+
def get_features(image: Image.Image, model: lp.Detectron2LayoutModel, label_names: List[str], width_parts = 100, height_parts = 100):
|
87 |
+
predictions = get_predictions(image, model)
|
88 |
+
reduced_predictions = predictions_reducer(**predictions)
|
89 |
+
sub_images_bboxes = list(split_image(np.array(image), width_parts, height_parts, result = 'bboxes'))
|
90 |
+
|
91 |
+
vectors = get_vectors(
|
92 |
+
sub_images_bboxes = sub_images_bboxes,
|
93 |
+
label_names = label_names,
|
94 |
+
weighted_jaccard_index = False,
|
95 |
+
**predictions)
|
96 |
+
|
97 |
+
weighted_vectors = get_vectors(
|
98 |
+
sub_images_bboxes = sub_images_bboxes,
|
99 |
+
label_names = label_names,
|
100 |
+
weighted_jaccard_index = True,
|
101 |
+
**predictions)
|
102 |
+
|
103 |
+
reduced_vectors = get_vectors(
|
104 |
+
sub_images_bboxes = sub_images_bboxes,
|
105 |
+
label_names = label_names,
|
106 |
+
weighted_jaccard_index = False,
|
107 |
+
**reduced_predictions)
|
108 |
+
|
109 |
+
reduced_weighted_vectors = get_vectors(
|
110 |
+
sub_images_bboxes = sub_images_bboxes,
|
111 |
+
label_names = label_names,
|
112 |
+
weighted_jaccard_index = True,
|
113 |
+
**reduced_predictions)
|
114 |
+
|
115 |
+
return {
|
116 |
+
'predicted_bboxes': predictions['predicted_bboxes'],
|
117 |
+
'predicted_scores': predictions['predicted_scores'],
|
118 |
+
'predicted_labels': predictions['predicted_labels'],
|
119 |
+
'vectors': list(vectors),
|
120 |
+
'weighted_vectors': list(weighted_vectors),
|
121 |
+
|
122 |
+
'reduced_predicted_bboxes': reduced_predictions['predicted_bboxes'],
|
123 |
+
'reduced_predicted_scores': reduced_predictions['predicted_scores'],
|
124 |
+
'reduced_predicted_labels': reduced_predictions['predicted_labels'],
|
125 |
+
'reduced_vectors': list(reduced_vectors),
|
126 |
+
'weighted_reduced_vectors': list(reduced_weighted_vectors),
|
127 |
+
}
|
utils/split_image.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from typing import Literal
|
3 |
+
|
4 |
+
def split_image(image: np.ndarray, width_parts: int, height_parts: int, result: Literal['np.ndarray', 'bboxes'] = 'np.ndarray'):
|
5 |
+
tile_width = image.shape[0] // width_parts
|
6 |
+
tile_height = image.shape[1] // height_parts
|
7 |
+
for height in range(height_parts):
|
8 |
+
for width in range(width_parts):
|
9 |
+
width_start = width * tile_width
|
10 |
+
width_end = tile_width * (width + 1) if (width + 1) < width_parts else image.shape[0]
|
11 |
+
height_start = height * tile_height
|
12 |
+
height_end = tile_height * (height + 1) if (height + 1) < height_parts else image.shape[1]
|
13 |
+
if result == 'np.ndarray':
|
14 |
+
# np.ndarray(height, width, channels)
|
15 |
+
yield image[height_start:height_end, width_start:width_end]
|
16 |
+
else:
|
17 |
+
# Both sets of boxes are expected to be in ``(x1, y1, x2, y2)`` format with ``0 <= x1 < x2`` and ``0 <= y1 < y2``.
|
18 |
+
yield (width_start, height_start, width_end, height_end)
|
19 |
+
|