Charles Kabui commited on
Commit
79904b0
·
1 Parent(s): a84ccd1

loading images

Browse files
Files changed (2) hide show
  1. app.py +91 -0
  2. utils/get_RGB_image.py +18 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ import gradio as gr
3
+ from utils.get_RGB_image import get_RGB_image, is_online_file, steam_online_file
4
+ from pdf2image import convert_from_path, convert_from_bytes
5
+
6
+ def similarity_fn(document_image_1, document_image_2):
7
+ xxx = 'BOTH' if document_image_1 and document_image_2 else 'ONE' if document_image_1 or document_image_2 else 'NONE'
8
+ return xxx
9
+
10
+ def load_image(filename, page = 0):
11
+ try:
12
+ image = None
13
+ try:
14
+ if (is_online_file(filename)):
15
+ image = get_RGB_image(convert_from_bytes(steam_online_file(filename))[page])
16
+ else:
17
+ image = get_RGB_image(convert_from_path(filename)[page])
18
+ except:
19
+ image = get_RGB_image(filename)
20
+ return [
21
+ gr.Image(value=image, visible=True),
22
+ None
23
+ ]
24
+ except:
25
+ error = traceback.format_exc()
26
+ return [None, gr.HTML(value=error, visible=True)]
27
+
28
+ def preview_url(url, page = 0):
29
+ [image, error] = load_image(url, page = page)
30
+ if image:
31
+ return [gr.Tabs(selected=0), image, error]
32
+ else:
33
+ return [gr.Tabs(selected=1), image, error]
34
+
35
+ def document_view(document_number: int):
36
+ gr.HTML(value=f'<h4>Load the {"first" if document_number == 1 else "second"} PDF or Document Image<h4>', elem_classes=['center'])
37
+ with gr.Tabs() as document_tabs:
38
+ with gr.Tab("From Image", id=0):
39
+ document = gr.Image(type="pil", label=f"Document {document_number}", visible=False)
40
+ document_error_message = gr.HTML(label="Error Message", visible=False)
41
+ document_preview = gr.UploadButton(
42
+ "Click to PDF or Document Image",
43
+ file_types=["image", ".pdf"],
44
+ file_count="single")
45
+ with gr.Tab("From URL", id=1):
46
+ document_url = gr.Textbox(
47
+ label=f"Document {document_number} URL",
48
+ info="Paste a Link/URL to PDF or Document Image",
49
+ placeholder="https://datasets-server.huggingface.co/.../image.jpg")
50
+ document_url_error_message = gr.HTML(label="Error Message", visible=False)
51
+ document_url_preview = gr.Button(value="Preview", variant="primary")
52
+ document_preview.upload(
53
+ fn = lambda file: load_image(file.name),
54
+ inputs = [document_preview],
55
+ outputs = [document, document_error_message])
56
+ document_url_preview.click(
57
+ fn = preview_url,
58
+ inputs = [document_url],
59
+ outputs = [document_tabs, document, document_url_error_message])
60
+ return document
61
+
62
+ def app():
63
+ title = 'Document Similarity Search Using Visual Layout Features'
64
+ description = "<h2>Document Similarity Search using Detectron2<h2>"
65
+ article = "<h4>More details, Links about this! - Document Similarity Search using Detectron2<h4>"
66
+ css = '''
67
+ image { max-height="86vh" !important; }
68
+ .center { display: flex; flex: 1 1 auto; align-items: center; align-content: center; justify-content: center; justify-items: center; }
69
+ '''
70
+ with gr.Blocks(title=title, css=css) as app:
71
+ with gr.Row():
72
+ gr.HTML(value=description, elem_classes=['center'])
73
+ with gr.Row(equal_height = False):
74
+ with gr.Column():
75
+ document_1_image = document_view(1)
76
+ with gr.Column():
77
+ document_2_image = document_view(2)
78
+ with gr.Row():
79
+ # gr.HTML(value=article, elem_classes=['center'])
80
+ submit = gr.Button(value="Preview", variant="primary")
81
+ similarity_output = gr.HTML(value=article, elem_classes=['center'], visible=False)
82
+ submit.click(
83
+ fn=lambda image: similarity_fn(image, just_image=False),
84
+ inputs=[document_1_image, document_2_image],
85
+ outputs=[similarity_output])
86
+
87
+ # threshold.change(
88
+ # fn=lambda image, threshold: similarity_fn(image, threshold, just_image=False),
89
+ # inputs=[document_1, threshold],
90
+ # outputs=[annotated_document_image, message])
91
+ return app.launch(debug=True)
utils/get_RGB_image.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from urllib.parse import urlparse
3
+ import requests
4
+
5
+ def is_online_file(url: str) -> bool:
6
+ return urlparse(url).scheme in ["http", "https"]
7
+
8
+ def steam_online_file(url: str) -> bytes:
9
+ return requests.get(url, stream=True).raw
10
+
11
+ def get_RGB_image(image_or_path: str | Image.Image) -> bytes:
12
+ if isinstance(image_or_path, str):
13
+ if is_online_file(image_or_path): # Online
14
+ content = steam_online_file(image_or_path)
15
+ image_or_path = Image.open(content)
16
+ else: # Local
17
+ image_or_path = Image.open(image_or_path)
18
+ return image_or_path.convert("RGB")