viklofg commited on
Commit
0658a37
·
1 Parent(s): 3cb882a

Merge templating and submit job tabs

Browse files

- Moved contents and functionality from 'Templating' tab to 'Submit Job' tab
- Removed the 'Templating' tab

Files changed (4) hide show
  1. app/gradio_config.py +30 -0
  2. app/main.py +2 -45
  3. app/tabs/submit.py +52 -21
  4. app/tabs/templating.py +0 -190
app/gradio_config.py CHANGED
@@ -46,4 +46,34 @@ hr.region-divider {
46
  margin-bottom: 0.5em;
47
  }
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  """
 
46
  margin-bottom: 0.5em;
47
  }
48
 
49
+ .pipeline-panel {
50
+ background: none;
51
+ border: solid 1px;
52
+ border-color: var(--block-border-color);
53
+ }
54
+
55
+ .pipeline-help {
56
+ padding: 5px 0 0 0;
57
+ font-weight: var(--block-info-text-weight);
58
+ font-size: var(--block-info-text-size);
59
+ color: var(--block-info-text-color);
60
+ }
61
+
62
+ .pipeline-help a {
63
+ color: var(--secondary-400);
64
+ }
65
+
66
+ .pipeline-help a:hover {
67
+ color: var(--secondary-500);
68
+ }
69
+
70
+ .pipeline-header {
71
+ padding: 2px 0px 0px 2px;
72
+ color: var(--body-text-color);
73
+ }
74
+
75
+ .pipeline-description {
76
+ margin: auto;
77
+ color: var(--body-text-color);
78
+ }
79
  """
app/main.py CHANGED
@@ -2,24 +2,13 @@ import shutil
2
  import gradio as gr
3
  import os
4
  from app.gradio_config import css, theme
5
- from app.tabs.submit import (
6
- submit,
7
- custom_template_yaml,
8
- collection_submit_state,
9
- )
10
  from app.tabs.visualizer import visualizer, collection as collection_viz_state
11
-
12
- from app.tabs.templating import (
13
- templating_block,
14
- TEMPLATE_IMAGE_FOLDER,
15
- TEMPLATE_YAML_FOLDER,
16
- template_output_yaml_code,
17
- )
18
  from gradio_modal import Modal
19
 
20
  from htrflow.models.huggingface.trocr import TrOCR
21
 
22
- gr.set_static_paths(paths=[TEMPLATE_IMAGE_FOLDER])
23
  gr.set_static_paths(paths=[TEMPLATE_YAML_FOLDER])
24
 
25
  # TODO: fix api/ endpoints..
@@ -78,9 +67,6 @@ with gr.Blocks(title="HTRflow", theme=theme, css=css, head=matomo) as demo:
78
  gr.Markdown(load_markdown(None, "main_sub_title"))
79
 
80
  with gr.Tabs(elem_classes="top-navbar") as navbar:
81
- with gr.Tab(label="Templating") as tab_templating:
82
- templating_block.render()
83
-
84
  with gr.Tab(label="Submit Job") as tab_submit:
85
  submit.render()
86
 
@@ -91,40 +77,11 @@ with gr.Blocks(title="HTRflow", theme=theme, css=css, head=matomo) as demo:
91
  def inital_trocr_load():
92
  TrOCR("Riksarkivet/trocr-base-handwritten-hist-swe-2")
93
 
94
- @demo.load()
95
- def inital_yaml_code():
96
- tmp_dir = "tmp/"
97
- if os.path.exists(tmp_dir) and os.path.isdir(tmp_dir):
98
- shutil.rmtree(tmp_dir)
99
-
100
- @demo.load(
101
- inputs=[template_output_yaml_code],
102
- outputs=[template_output_yaml_code],
103
- )
104
- def inital_yaml_code(template_output_yaml_code):
105
- return template_output_yaml_code
106
-
107
- def sync_gradio_objects(input_value, state_value):
108
- """Synchronize the YAML state if there is a mismatch."""
109
- return input_value if input_value != state_value else gr.skip()
110
-
111
  def sync_gradio_object_state(input_value, state_value):
112
  """Synchronize the Collection."""
113
  state_value = input_value
114
  return state_value if state_value is not None else gr.skip()
115
 
116
- tab_templating.select(
117
- inputs=[custom_template_yaml, template_output_yaml_code],
118
- outputs=[template_output_yaml_code],
119
- fn=sync_gradio_objects,
120
- )
121
-
122
- tab_submit.select(
123
- inputs=[template_output_yaml_code, custom_template_yaml],
124
- outputs=[custom_template_yaml],
125
- fn=sync_gradio_objects,
126
- )
127
-
128
  tab_visualizer.select(
129
  inputs=[collection_submit_state, collection_viz_state],
130
  outputs=[collection_viz_state],
 
2
  import gradio as gr
3
  import os
4
  from app.gradio_config import css, theme
5
+ from app.tabs.submit import submit, collection_submit_state
 
 
 
 
6
  from app.tabs.visualizer import visualizer, collection as collection_viz_state
 
 
 
 
 
 
 
7
  from gradio_modal import Modal
8
 
9
  from htrflow.models.huggingface.trocr import TrOCR
10
 
11
+ TEMPLATE_YAML_FOLDER = "app/assets/templates"
12
  gr.set_static_paths(paths=[TEMPLATE_YAML_FOLDER])
13
 
14
  # TODO: fix api/ endpoints..
 
67
  gr.Markdown(load_markdown(None, "main_sub_title"))
68
 
69
  with gr.Tabs(elem_classes="top-navbar") as navbar:
 
 
 
70
  with gr.Tab(label="Submit Job") as tab_submit:
71
  submit.render()
72
 
 
77
  def inital_trocr_load():
78
  TrOCR("Riksarkivet/trocr-base-handwritten-hist-swe-2")
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def sync_gradio_object_state(input_value, state_value):
81
  """Synchronize the Collection."""
82
  state_value = input_value
83
  return state_value if state_value is not None else gr.skip()
84
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  tab_visualizer.select(
86
  inputs=[collection_submit_state, collection_viz_state],
87
  outputs=[collection_viz_state],
app/tabs/submit.py CHANGED
@@ -11,6 +11,19 @@ from htrflow.pipeline.steps import auto_import
11
  import yaml
12
 
13
  MAX_IMAGES = int(os.environ.get("MAX_IMAGES", 5)) # env: Maximum allowed images
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
 
16
  class PipelineWithProgress(Pipeline):
@@ -157,34 +170,52 @@ def tracking_exported_files(tmp_output_paths):
157
  return sorted(exported_files)
158
 
159
 
 
 
 
 
 
 
 
 
 
 
160
  with gr.Blocks() as submit:
161
  collection_submit_state = gr.State()
 
 
 
 
 
 
162
 
163
- with gr.Column(variant="panel"):
164
- with gr.Group():
165
- with gr.Row():
166
- with gr.Column(scale=1):
167
- batch_image_gallery = gr.Gallery(
168
- file_types=["image"],
169
- label="Upload the images you want to transcribe",
170
- interactive=True,
171
- object_fit="cover",
172
- )
173
 
174
- with gr.Column(scale=1):
175
- custom_template_yaml = gr.Code(
176
- value="",
177
- language="yaml",
178
- label="Pipeline",
179
- interactive=True,
180
- )
181
  with gr.Row():
182
- run_button = gr.Button("Submit", variant="primary", scale=0, min_width=200)
183
- progess_bar = gr.Textbox(visible=False, show_label=False)
184
- collection_output_files = gr.Files(
185
- label="Output Files", scale=0, min_width=400, visible=False
 
186
  )
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  @batch_image_gallery.upload(
189
  inputs=batch_image_gallery,
190
  outputs=[batch_image_gallery],
 
11
  import yaml
12
 
13
  MAX_IMAGES = int(os.environ.get("MAX_IMAGES", 5)) # env: Maximum allowed images
14
+ PIPELINE_DOCUMENTATION = (
15
+ "https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines"
16
+ )
17
+ PIPELINES = {
18
+ "Running text (Swedish)": {
19
+ "file": "app/assets/templates/2_nested.yaml",
20
+ "description": "This pipeline works well on documents with multiple text regions.",
21
+ },
22
+ "Letters (Swedish)": {
23
+ "file": "app/assets/templates/1_simple.yaml",
24
+ "description": "This pipeline works well on letters and other documents with only one text region.",
25
+ },
26
+ }
27
 
28
 
29
  class PipelineWithProgress(Pipeline):
 
170
  return sorted(exported_files)
171
 
172
 
173
+ def get_description(pipeline: str):
174
+ return PIPELINES[pipeline]["description"]
175
+
176
+
177
+ def get_yaml(pipeline: str):
178
+ with open(PIPELINES[pipeline]["file"], "r") as f:
179
+ pipeline = f.read()
180
+ return pipeline
181
+
182
+
183
  with gr.Blocks() as submit:
184
  collection_submit_state = gr.State()
185
+ batch_image_gallery = gr.Gallery(
186
+ file_types=["image"],
187
+ label="Upload the images you want to transcribe",
188
+ interactive=True,
189
+ object_fit="cover",
190
+ )
191
 
192
+ with gr.Column(variant="panel", elem_classes="pipeline-panel"):
193
+ gr.HTML("Pipeline", elem_classes="pipeline-header", padding=False)
 
 
 
 
 
 
 
 
194
 
 
 
 
 
 
 
 
195
  with gr.Row():
196
+ pipeline_dropdown = gr.Dropdown(
197
+ PIPELINES, container=False, min_width=240, scale=0, elem_classes="pipeline-dropdown"
198
+ )
199
+ pipeline_description = gr.HTML(
200
+ value=get_description, inputs=pipeline_dropdown, elem_classes="pipeline-description", padding=False
201
  )
202
 
203
+ with gr.Group():
204
+ with gr.Accordion("Edit pipeline", open=False):
205
+ custom_template_yaml = gr.Code(
206
+ value=get_yaml, inputs=pipeline_dropdown, language="yaml", container=False
207
+ )
208
+ gr.HTML(
209
+ f'See the <a href="{PIPELINE_DOCUMENTATION}">documentation</a> for a detailed description on how to customize HTRflow pipelines.',
210
+ padding=False,
211
+ elem_classes="pipeline-help",
212
+ )
213
+
214
+ with gr.Row():
215
+ run_button = gr.Button("Submit", variant="primary", scale=0, min_width=200)
216
+ progess_bar = gr.Textbox(visible=False, show_label=False)
217
+ collection_output_files = gr.Files(label="Output Files", scale=0, min_width=400, visible=False)
218
+
219
  @batch_image_gallery.upload(
220
  inputs=batch_image_gallery,
221
  outputs=[batch_image_gallery],
app/tabs/templating.py DELETED
@@ -1,190 +0,0 @@
1
- import gradio as gr
2
- import os
3
- import re
4
-
5
-
6
- def get_sorted_files(folder, extensions):
7
- """Retrieve sorted files by numeric value in their names."""
8
- return sorted(
9
- [
10
- os.path.join(folder, file)
11
- for file in os.listdir(folder)
12
- if file.lower().endswith(extensions)
13
- ],
14
- key=lambda x: (
15
- int(re.search(r"\d+", os.path.basename(x)).group())
16
- if re.search(r"\d+", os.path.basename(x))
17
- else float("inf")
18
- ),
19
- )
20
-
21
-
22
- def filter_files_by_prefix(files, prefix_pattern):
23
- """Filter files based on a regex prefix pattern."""
24
- return [file for file in files if re.match(prefix_pattern, os.path.basename(file))]
25
-
26
-
27
- def clean_file_names(files, prefix_to_remove):
28
- """Clean filenames by removing a specific prefix if present."""
29
- return [
30
- (
31
- os.path.basename(file)[len(prefix_to_remove) :]
32
- if os.path.basename(file).startswith(prefix_to_remove)
33
- else os.path.basename(file)
34
- )
35
- for file in files
36
- ]
37
-
38
-
39
- def get_yaml_content(yaml_path):
40
- """Read and return YAML content from a file."""
41
- if os.path.isfile(yaml_path):
42
- with open(yaml_path, "r") as file:
43
- return file.read()
44
- return "YAML content not available"
45
-
46
-
47
- TEMPLATE_IMAGE_FOLDER = "app/assets/images"
48
- TEMPLATE_YAML_FOLDER = "app/assets/templates"
49
-
50
- image_files = get_sorted_files(
51
- TEMPLATE_IMAGE_FOLDER, (".png", ".jpg", ".jpeg", ".webp")
52
- )
53
- yaml_files = get_sorted_files(TEMPLATE_YAML_FOLDER, (".yaml",))
54
-
55
- yaml_files_numbered = filter_files_by_prefix(yaml_files, r"^\d")
56
- yaml_files_c_letter = filter_files_by_prefix(yaml_files, r"^[cC]")
57
-
58
- name_yaml_files_c_letter_cleaned = clean_file_names(yaml_files_c_letter, "c_")
59
- name_to_yaml_map = dict(zip(name_yaml_files_c_letter_cleaned, yaml_files_c_letter))
60
-
61
-
62
- def get_yaml_content(yaml_path):
63
- if yaml_path and os.path.isfile(yaml_path):
64
- with open(yaml_path, "r") as file:
65
- return file.read()
66
- return "YAML content not available"
67
-
68
-
69
- with gr.Blocks() as templating_block:
70
- with gr.Row(variant="panel"):
71
- with gr.Column(scale=2):
72
- with gr.Row():
73
- dropdown_selection_template = gr.Dropdown(
74
- label="Choose template",
75
- info="Choice a suitable template for your material",
76
- value="Simple",
77
- choices=["Simple", "Nested", "Custom"],
78
- multiselect=False,
79
- interactive=True,
80
- )
81
-
82
- custom_dropdown_selection_template = gr.Dropdown(
83
- label="Custom template",
84
- info="Choice a different custom templates...",
85
- value=name_yaml_files_c_letter_cleaned[0],
86
- choices=name_yaml_files_c_letter_cleaned,
87
- multiselect=False,
88
- interactive=True,
89
- visible=False,
90
- )
91
-
92
- with gr.Group():
93
- with gr.Row():
94
- with gr.Column(scale=1):
95
- template_image = gr.Image(
96
- label="Example Templates", value=image_files[0], height=400
97
- )
98
- with gr.Column(scale=1):
99
- template_output_yaml_code = gr.Code(
100
- language="yaml",
101
- label="Pipeline",
102
- interactive=True,
103
- visible=True,
104
- )
105
- docs_link = gr.HTML(
106
- value='<p><a href="https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines" target="_blank">📚 Click here 📚</a> for a detailed description on how to customize the configuration for HTRflow</p>',
107
- visible=True,
108
- )
109
-
110
- @dropdown_selection_template.select(
111
- inputs=dropdown_selection_template,
112
- outputs=[
113
- template_image,
114
- template_output_yaml_code,
115
- custom_dropdown_selection_template,
116
- ],
117
- )
118
- def on_template_select(dropdown_selection_template):
119
- if dropdown_selection_template == "Simple":
120
- yaml_content = get_yaml_content(yaml_files_numbered[0])
121
- return image_files[0], yaml_content, gr.update(visible=False)
122
- elif dropdown_selection_template == "Nested":
123
- yaml_content = get_yaml_content(yaml_files_numbered[1])
124
- return image_files[1], yaml_content, gr.update(visible=False)
125
- elif dropdown_selection_template == "Custom":
126
- yaml_content = get_yaml_content(yaml_files_c_letter[0])
127
- return image_files[2], yaml_content, gr.update(visible=True)
128
- else:
129
- return gr.Error(
130
- f"{dropdown_selection_template} - is not a valid Template selection"
131
- )
132
-
133
- @custom_dropdown_selection_template.select(
134
- inputs=custom_dropdown_selection_template,
135
- outputs=[template_output_yaml_code],
136
- )
137
- def on_custom_template_select(custom_template_selection):
138
- yaml_path = name_to_yaml_map.get(custom_template_selection)
139
-
140
- if yaml_path:
141
- yaml_content = get_yaml_content(yaml_path)
142
- return yaml_content
143
- else:
144
- return gr.Error(
145
- f"{custom_template_selection} - is not a valid Custom Template selection"
146
- )
147
-
148
- @dropdown_selection_template.select(
149
- inputs=dropdown_selection_template,
150
- outputs=[template_output_yaml_code],
151
- )
152
- def check_for_custom_template(dropdown_selection_template):
153
- if dropdown_selection_template == "Custom":
154
- return gr.update(visible=True)
155
- else:
156
- return gr.skip()
157
-
158
- templating_block.load(
159
- fn=on_template_select,
160
- inputs=dropdown_selection_template,
161
- outputs=[
162
- template_image,
163
- template_output_yaml_code,
164
- custom_dropdown_selection_template,
165
- ],
166
- )
167
-
168
- # TODO: Vi vill ändra namn på på fileerna så man ser vilken extension (format) fileerna är i
169
- # rimes_test - kopia 2_page
170
- # .xml
171
- # 3.5 KB ⇣
172
- # ×
173
- # rimes_test - kopia
174
- # .xml
175
- # 3.5 KB ⇣
176
- # ×
177
- # rimes_test
178
- # .xml
179
- # 3.4 KB ⇣
180
- # ×
181
- # rimes_test - kopia 2
182
- # .xml
183
- # 1.7 KB ⇣
184
- # ×
185
- # rimes_test - kopia
186
- # .xml
187
- # 1.7 KB ⇣
188
- # ×
189
- # rimes_test
190
- # .xml