Spaces:
Runtime error
Runtime error
viklofg
commited on
Commit
·
0658a37
1
Parent(s):
3cb882a
Merge templating and submit job tabs
Browse files- Moved contents and functionality from 'Templating' tab to 'Submit Job' tab
- Removed the 'Templating' tab
- app/gradio_config.py +30 -0
- app/main.py +2 -45
- app/tabs/submit.py +52 -21
- app/tabs/templating.py +0 -190
app/gradio_config.py
CHANGED
@@ -46,4 +46,34 @@ hr.region-divider {
|
|
46 |
margin-bottom: 0.5em;
|
47 |
}
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
"""
|
|
|
46 |
margin-bottom: 0.5em;
|
47 |
}
|
48 |
|
49 |
+
.pipeline-panel {
|
50 |
+
background: none;
|
51 |
+
border: solid 1px;
|
52 |
+
border-color: var(--block-border-color);
|
53 |
+
}
|
54 |
+
|
55 |
+
.pipeline-help {
|
56 |
+
padding: 5px 0 0 0;
|
57 |
+
font-weight: var(--block-info-text-weight);
|
58 |
+
font-size: var(--block-info-text-size);
|
59 |
+
color: var(--block-info-text-color);
|
60 |
+
}
|
61 |
+
|
62 |
+
.pipeline-help a {
|
63 |
+
color: var(--secondary-400);
|
64 |
+
}
|
65 |
+
|
66 |
+
.pipeline-help a:hover {
|
67 |
+
color: var(--secondary-500);
|
68 |
+
}
|
69 |
+
|
70 |
+
.pipeline-header {
|
71 |
+
padding: 2px 0px 0px 2px;
|
72 |
+
color: var(--body-text-color);
|
73 |
+
}
|
74 |
+
|
75 |
+
.pipeline-description {
|
76 |
+
margin: auto;
|
77 |
+
color: var(--body-text-color);
|
78 |
+
}
|
79 |
"""
|
app/main.py
CHANGED
@@ -2,24 +2,13 @@ import shutil
|
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
from app.gradio_config import css, theme
|
5 |
-
from app.tabs.submit import
|
6 |
-
submit,
|
7 |
-
custom_template_yaml,
|
8 |
-
collection_submit_state,
|
9 |
-
)
|
10 |
from app.tabs.visualizer import visualizer, collection as collection_viz_state
|
11 |
-
|
12 |
-
from app.tabs.templating import (
|
13 |
-
templating_block,
|
14 |
-
TEMPLATE_IMAGE_FOLDER,
|
15 |
-
TEMPLATE_YAML_FOLDER,
|
16 |
-
template_output_yaml_code,
|
17 |
-
)
|
18 |
from gradio_modal import Modal
|
19 |
|
20 |
from htrflow.models.huggingface.trocr import TrOCR
|
21 |
|
22 |
-
|
23 |
gr.set_static_paths(paths=[TEMPLATE_YAML_FOLDER])
|
24 |
|
25 |
# TODO: fix api/ endpoints..
|
@@ -78,9 +67,6 @@ with gr.Blocks(title="HTRflow", theme=theme, css=css, head=matomo) as demo:
|
|
78 |
gr.Markdown(load_markdown(None, "main_sub_title"))
|
79 |
|
80 |
with gr.Tabs(elem_classes="top-navbar") as navbar:
|
81 |
-
with gr.Tab(label="Templating") as tab_templating:
|
82 |
-
templating_block.render()
|
83 |
-
|
84 |
with gr.Tab(label="Submit Job") as tab_submit:
|
85 |
submit.render()
|
86 |
|
@@ -91,40 +77,11 @@ with gr.Blocks(title="HTRflow", theme=theme, css=css, head=matomo) as demo:
|
|
91 |
def inital_trocr_load():
|
92 |
TrOCR("Riksarkivet/trocr-base-handwritten-hist-swe-2")
|
93 |
|
94 |
-
@demo.load()
|
95 |
-
def inital_yaml_code():
|
96 |
-
tmp_dir = "tmp/"
|
97 |
-
if os.path.exists(tmp_dir) and os.path.isdir(tmp_dir):
|
98 |
-
shutil.rmtree(tmp_dir)
|
99 |
-
|
100 |
-
@demo.load(
|
101 |
-
inputs=[template_output_yaml_code],
|
102 |
-
outputs=[template_output_yaml_code],
|
103 |
-
)
|
104 |
-
def inital_yaml_code(template_output_yaml_code):
|
105 |
-
return template_output_yaml_code
|
106 |
-
|
107 |
-
def sync_gradio_objects(input_value, state_value):
|
108 |
-
"""Synchronize the YAML state if there is a mismatch."""
|
109 |
-
return input_value if input_value != state_value else gr.skip()
|
110 |
-
|
111 |
def sync_gradio_object_state(input_value, state_value):
|
112 |
"""Synchronize the Collection."""
|
113 |
state_value = input_value
|
114 |
return state_value if state_value is not None else gr.skip()
|
115 |
|
116 |
-
tab_templating.select(
|
117 |
-
inputs=[custom_template_yaml, template_output_yaml_code],
|
118 |
-
outputs=[template_output_yaml_code],
|
119 |
-
fn=sync_gradio_objects,
|
120 |
-
)
|
121 |
-
|
122 |
-
tab_submit.select(
|
123 |
-
inputs=[template_output_yaml_code, custom_template_yaml],
|
124 |
-
outputs=[custom_template_yaml],
|
125 |
-
fn=sync_gradio_objects,
|
126 |
-
)
|
127 |
-
|
128 |
tab_visualizer.select(
|
129 |
inputs=[collection_submit_state, collection_viz_state],
|
130 |
outputs=[collection_viz_state],
|
|
|
2 |
import gradio as gr
|
3 |
import os
|
4 |
from app.gradio_config import css, theme
|
5 |
+
from app.tabs.submit import submit, collection_submit_state
|
|
|
|
|
|
|
|
|
6 |
from app.tabs.visualizer import visualizer, collection as collection_viz_state
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from gradio_modal import Modal
|
8 |
|
9 |
from htrflow.models.huggingface.trocr import TrOCR
|
10 |
|
11 |
+
TEMPLATE_YAML_FOLDER = "app/assets/templates"
|
12 |
gr.set_static_paths(paths=[TEMPLATE_YAML_FOLDER])
|
13 |
|
14 |
# TODO: fix api/ endpoints..
|
|
|
67 |
gr.Markdown(load_markdown(None, "main_sub_title"))
|
68 |
|
69 |
with gr.Tabs(elem_classes="top-navbar") as navbar:
|
|
|
|
|
|
|
70 |
with gr.Tab(label="Submit Job") as tab_submit:
|
71 |
submit.render()
|
72 |
|
|
|
77 |
def inital_trocr_load():
|
78 |
TrOCR("Riksarkivet/trocr-base-handwritten-hist-swe-2")
|
79 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
def sync_gradio_object_state(input_value, state_value):
|
81 |
"""Synchronize the Collection."""
|
82 |
state_value = input_value
|
83 |
return state_value if state_value is not None else gr.skip()
|
84 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
85 |
tab_visualizer.select(
|
86 |
inputs=[collection_submit_state, collection_viz_state],
|
87 |
outputs=[collection_viz_state],
|
app/tabs/submit.py
CHANGED
@@ -11,6 +11,19 @@ from htrflow.pipeline.steps import auto_import
|
|
11 |
import yaml
|
12 |
|
13 |
MAX_IMAGES = int(os.environ.get("MAX_IMAGES", 5)) # env: Maximum allowed images
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
15 |
|
16 |
class PipelineWithProgress(Pipeline):
|
@@ -157,34 +170,52 @@ def tracking_exported_files(tmp_output_paths):
|
|
157 |
return sorted(exported_files)
|
158 |
|
159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
160 |
with gr.Blocks() as submit:
|
161 |
collection_submit_state = gr.State()
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
-
with gr.Column(variant="panel"):
|
164 |
-
|
165 |
-
with gr.Row():
|
166 |
-
with gr.Column(scale=1):
|
167 |
-
batch_image_gallery = gr.Gallery(
|
168 |
-
file_types=["image"],
|
169 |
-
label="Upload the images you want to transcribe",
|
170 |
-
interactive=True,
|
171 |
-
object_fit="cover",
|
172 |
-
)
|
173 |
|
174 |
-
with gr.Column(scale=1):
|
175 |
-
custom_template_yaml = gr.Code(
|
176 |
-
value="",
|
177 |
-
language="yaml",
|
178 |
-
label="Pipeline",
|
179 |
-
interactive=True,
|
180 |
-
)
|
181 |
with gr.Row():
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
|
|
186 |
)
|
187 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
188 |
@batch_image_gallery.upload(
|
189 |
inputs=batch_image_gallery,
|
190 |
outputs=[batch_image_gallery],
|
|
|
11 |
import yaml
|
12 |
|
13 |
MAX_IMAGES = int(os.environ.get("MAX_IMAGES", 5)) # env: Maximum allowed images
|
14 |
+
PIPELINE_DOCUMENTATION = (
|
15 |
+
"https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines"
|
16 |
+
)
|
17 |
+
PIPELINES = {
|
18 |
+
"Running text (Swedish)": {
|
19 |
+
"file": "app/assets/templates/2_nested.yaml",
|
20 |
+
"description": "This pipeline works well on documents with multiple text regions.",
|
21 |
+
},
|
22 |
+
"Letters (Swedish)": {
|
23 |
+
"file": "app/assets/templates/1_simple.yaml",
|
24 |
+
"description": "This pipeline works well on letters and other documents with only one text region.",
|
25 |
+
},
|
26 |
+
}
|
27 |
|
28 |
|
29 |
class PipelineWithProgress(Pipeline):
|
|
|
170 |
return sorted(exported_files)
|
171 |
|
172 |
|
173 |
+
def get_description(pipeline: str):
|
174 |
+
return PIPELINES[pipeline]["description"]
|
175 |
+
|
176 |
+
|
177 |
+
def get_yaml(pipeline: str):
|
178 |
+
with open(PIPELINES[pipeline]["file"], "r") as f:
|
179 |
+
pipeline = f.read()
|
180 |
+
return pipeline
|
181 |
+
|
182 |
+
|
183 |
with gr.Blocks() as submit:
|
184 |
collection_submit_state = gr.State()
|
185 |
+
batch_image_gallery = gr.Gallery(
|
186 |
+
file_types=["image"],
|
187 |
+
label="Upload the images you want to transcribe",
|
188 |
+
interactive=True,
|
189 |
+
object_fit="cover",
|
190 |
+
)
|
191 |
|
192 |
+
with gr.Column(variant="panel", elem_classes="pipeline-panel"):
|
193 |
+
gr.HTML("Pipeline", elem_classes="pipeline-header", padding=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
with gr.Row():
|
196 |
+
pipeline_dropdown = gr.Dropdown(
|
197 |
+
PIPELINES, container=False, min_width=240, scale=0, elem_classes="pipeline-dropdown"
|
198 |
+
)
|
199 |
+
pipeline_description = gr.HTML(
|
200 |
+
value=get_description, inputs=pipeline_dropdown, elem_classes="pipeline-description", padding=False
|
201 |
)
|
202 |
|
203 |
+
with gr.Group():
|
204 |
+
with gr.Accordion("Edit pipeline", open=False):
|
205 |
+
custom_template_yaml = gr.Code(
|
206 |
+
value=get_yaml, inputs=pipeline_dropdown, language="yaml", container=False
|
207 |
+
)
|
208 |
+
gr.HTML(
|
209 |
+
f'See the <a href="{PIPELINE_DOCUMENTATION}">documentation</a> for a detailed description on how to customize HTRflow pipelines.',
|
210 |
+
padding=False,
|
211 |
+
elem_classes="pipeline-help",
|
212 |
+
)
|
213 |
+
|
214 |
+
with gr.Row():
|
215 |
+
run_button = gr.Button("Submit", variant="primary", scale=0, min_width=200)
|
216 |
+
progess_bar = gr.Textbox(visible=False, show_label=False)
|
217 |
+
collection_output_files = gr.Files(label="Output Files", scale=0, min_width=400, visible=False)
|
218 |
+
|
219 |
@batch_image_gallery.upload(
|
220 |
inputs=batch_image_gallery,
|
221 |
outputs=[batch_image_gallery],
|
app/tabs/templating.py
DELETED
@@ -1,190 +0,0 @@
|
|
1 |
-
import gradio as gr
|
2 |
-
import os
|
3 |
-
import re
|
4 |
-
|
5 |
-
|
6 |
-
def get_sorted_files(folder, extensions):
|
7 |
-
"""Retrieve sorted files by numeric value in their names."""
|
8 |
-
return sorted(
|
9 |
-
[
|
10 |
-
os.path.join(folder, file)
|
11 |
-
for file in os.listdir(folder)
|
12 |
-
if file.lower().endswith(extensions)
|
13 |
-
],
|
14 |
-
key=lambda x: (
|
15 |
-
int(re.search(r"\d+", os.path.basename(x)).group())
|
16 |
-
if re.search(r"\d+", os.path.basename(x))
|
17 |
-
else float("inf")
|
18 |
-
),
|
19 |
-
)
|
20 |
-
|
21 |
-
|
22 |
-
def filter_files_by_prefix(files, prefix_pattern):
|
23 |
-
"""Filter files based on a regex prefix pattern."""
|
24 |
-
return [file for file in files if re.match(prefix_pattern, os.path.basename(file))]
|
25 |
-
|
26 |
-
|
27 |
-
def clean_file_names(files, prefix_to_remove):
|
28 |
-
"""Clean filenames by removing a specific prefix if present."""
|
29 |
-
return [
|
30 |
-
(
|
31 |
-
os.path.basename(file)[len(prefix_to_remove) :]
|
32 |
-
if os.path.basename(file).startswith(prefix_to_remove)
|
33 |
-
else os.path.basename(file)
|
34 |
-
)
|
35 |
-
for file in files
|
36 |
-
]
|
37 |
-
|
38 |
-
|
39 |
-
def get_yaml_content(yaml_path):
|
40 |
-
"""Read and return YAML content from a file."""
|
41 |
-
if os.path.isfile(yaml_path):
|
42 |
-
with open(yaml_path, "r") as file:
|
43 |
-
return file.read()
|
44 |
-
return "YAML content not available"
|
45 |
-
|
46 |
-
|
47 |
-
TEMPLATE_IMAGE_FOLDER = "app/assets/images"
|
48 |
-
TEMPLATE_YAML_FOLDER = "app/assets/templates"
|
49 |
-
|
50 |
-
image_files = get_sorted_files(
|
51 |
-
TEMPLATE_IMAGE_FOLDER, (".png", ".jpg", ".jpeg", ".webp")
|
52 |
-
)
|
53 |
-
yaml_files = get_sorted_files(TEMPLATE_YAML_FOLDER, (".yaml",))
|
54 |
-
|
55 |
-
yaml_files_numbered = filter_files_by_prefix(yaml_files, r"^\d")
|
56 |
-
yaml_files_c_letter = filter_files_by_prefix(yaml_files, r"^[cC]")
|
57 |
-
|
58 |
-
name_yaml_files_c_letter_cleaned = clean_file_names(yaml_files_c_letter, "c_")
|
59 |
-
name_to_yaml_map = dict(zip(name_yaml_files_c_letter_cleaned, yaml_files_c_letter))
|
60 |
-
|
61 |
-
|
62 |
-
def get_yaml_content(yaml_path):
|
63 |
-
if yaml_path and os.path.isfile(yaml_path):
|
64 |
-
with open(yaml_path, "r") as file:
|
65 |
-
return file.read()
|
66 |
-
return "YAML content not available"
|
67 |
-
|
68 |
-
|
69 |
-
with gr.Blocks() as templating_block:
|
70 |
-
with gr.Row(variant="panel"):
|
71 |
-
with gr.Column(scale=2):
|
72 |
-
with gr.Row():
|
73 |
-
dropdown_selection_template = gr.Dropdown(
|
74 |
-
label="Choose template",
|
75 |
-
info="Choice a suitable template for your material",
|
76 |
-
value="Simple",
|
77 |
-
choices=["Simple", "Nested", "Custom"],
|
78 |
-
multiselect=False,
|
79 |
-
interactive=True,
|
80 |
-
)
|
81 |
-
|
82 |
-
custom_dropdown_selection_template = gr.Dropdown(
|
83 |
-
label="Custom template",
|
84 |
-
info="Choice a different custom templates...",
|
85 |
-
value=name_yaml_files_c_letter_cleaned[0],
|
86 |
-
choices=name_yaml_files_c_letter_cleaned,
|
87 |
-
multiselect=False,
|
88 |
-
interactive=True,
|
89 |
-
visible=False,
|
90 |
-
)
|
91 |
-
|
92 |
-
with gr.Group():
|
93 |
-
with gr.Row():
|
94 |
-
with gr.Column(scale=1):
|
95 |
-
template_image = gr.Image(
|
96 |
-
label="Example Templates", value=image_files[0], height=400
|
97 |
-
)
|
98 |
-
with gr.Column(scale=1):
|
99 |
-
template_output_yaml_code = gr.Code(
|
100 |
-
language="yaml",
|
101 |
-
label="Pipeline",
|
102 |
-
interactive=True,
|
103 |
-
visible=True,
|
104 |
-
)
|
105 |
-
docs_link = gr.HTML(
|
106 |
-
value='<p><a href="https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines" target="_blank">📚 Click here 📚</a> for a detailed description on how to customize the configuration for HTRflow</p>',
|
107 |
-
visible=True,
|
108 |
-
)
|
109 |
-
|
110 |
-
@dropdown_selection_template.select(
|
111 |
-
inputs=dropdown_selection_template,
|
112 |
-
outputs=[
|
113 |
-
template_image,
|
114 |
-
template_output_yaml_code,
|
115 |
-
custom_dropdown_selection_template,
|
116 |
-
],
|
117 |
-
)
|
118 |
-
def on_template_select(dropdown_selection_template):
|
119 |
-
if dropdown_selection_template == "Simple":
|
120 |
-
yaml_content = get_yaml_content(yaml_files_numbered[0])
|
121 |
-
return image_files[0], yaml_content, gr.update(visible=False)
|
122 |
-
elif dropdown_selection_template == "Nested":
|
123 |
-
yaml_content = get_yaml_content(yaml_files_numbered[1])
|
124 |
-
return image_files[1], yaml_content, gr.update(visible=False)
|
125 |
-
elif dropdown_selection_template == "Custom":
|
126 |
-
yaml_content = get_yaml_content(yaml_files_c_letter[0])
|
127 |
-
return image_files[2], yaml_content, gr.update(visible=True)
|
128 |
-
else:
|
129 |
-
return gr.Error(
|
130 |
-
f"{dropdown_selection_template} - is not a valid Template selection"
|
131 |
-
)
|
132 |
-
|
133 |
-
@custom_dropdown_selection_template.select(
|
134 |
-
inputs=custom_dropdown_selection_template,
|
135 |
-
outputs=[template_output_yaml_code],
|
136 |
-
)
|
137 |
-
def on_custom_template_select(custom_template_selection):
|
138 |
-
yaml_path = name_to_yaml_map.get(custom_template_selection)
|
139 |
-
|
140 |
-
if yaml_path:
|
141 |
-
yaml_content = get_yaml_content(yaml_path)
|
142 |
-
return yaml_content
|
143 |
-
else:
|
144 |
-
return gr.Error(
|
145 |
-
f"{custom_template_selection} - is not a valid Custom Template selection"
|
146 |
-
)
|
147 |
-
|
148 |
-
@dropdown_selection_template.select(
|
149 |
-
inputs=dropdown_selection_template,
|
150 |
-
outputs=[template_output_yaml_code],
|
151 |
-
)
|
152 |
-
def check_for_custom_template(dropdown_selection_template):
|
153 |
-
if dropdown_selection_template == "Custom":
|
154 |
-
return gr.update(visible=True)
|
155 |
-
else:
|
156 |
-
return gr.skip()
|
157 |
-
|
158 |
-
templating_block.load(
|
159 |
-
fn=on_template_select,
|
160 |
-
inputs=dropdown_selection_template,
|
161 |
-
outputs=[
|
162 |
-
template_image,
|
163 |
-
template_output_yaml_code,
|
164 |
-
custom_dropdown_selection_template,
|
165 |
-
],
|
166 |
-
)
|
167 |
-
|
168 |
-
# TODO: Vi vill ändra namn på på fileerna så man ser vilken extension (format) fileerna är i
|
169 |
-
# rimes_test - kopia 2_page
|
170 |
-
# .xml
|
171 |
-
# 3.5 KB ⇣
|
172 |
-
# ×
|
173 |
-
# rimes_test - kopia
|
174 |
-
# .xml
|
175 |
-
# 3.5 KB ⇣
|
176 |
-
# ×
|
177 |
-
# rimes_test
|
178 |
-
# .xml
|
179 |
-
# 3.4 KB ⇣
|
180 |
-
# ×
|
181 |
-
# rimes_test - kopia 2
|
182 |
-
# .xml
|
183 |
-
# 1.7 KB ⇣
|
184 |
-
# ×
|
185 |
-
# rimes_test - kopia
|
186 |
-
# .xml
|
187 |
-
# 1.7 KB ⇣
|
188 |
-
# ×
|
189 |
-
# rimes_test
|
190 |
-
# .xml
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|