Gabriel commited on
Commit
14d4a0b
1 Parent(s): 43c84d4

test new scheduler

Browse files
.gitignore CHANGED
@@ -23,3 +23,9 @@ page_txt.txt
23
  transcribed_text.txt
24
  helper/examples/.cache_images/
25
  helper/examples/images/localtest/
 
 
 
 
 
 
 
23
  transcribed_text.txt
24
  helper/examples/.cache_images/
25
  helper/examples/images/localtest/
26
+ .env
27
+ TODO.md
28
+ .cache_images/
29
+ traffic_data.db
30
+ ip_data.csv
31
+ data/
app.py CHANGED
@@ -1,4 +1,14 @@
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
2
 
3
  from helper.gradio_config import css, theme
4
  from helper.text.text_about import TextAbout
@@ -8,8 +18,78 @@ from helper.text.text_roadmap import TextRoadmap
8
  from tabs.htr_tool import htr_tool_tab
9
  from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
12
- gr.Markdown(TextApp.title_markdown)
 
 
 
 
 
 
13
 
14
  with gr.Tabs():
15
  with gr.Tab("HTR Tool"):
@@ -18,61 +98,16 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
18
  with gr.Tab("Stepwise HTR Tool"):
19
  stepwise_htr_tool_tab.render()
20
 
21
- with gr.Tab("How to use"):
22
- with gr.Tabs():
23
- with gr.Tab("HTR Tool"):
24
- with gr.Row(equal_height=False):
25
- with gr.Column():
26
- gr.Markdown(TextHowTo.htr_tool)
27
- with gr.Column():
28
- gr.Markdown(TextHowTo.both_htr_tool_video)
29
- gr.Video(
30
- value="https://github.com/Borg93/htr_gradio_file_placeholder/raw/main/htr_tool_media_cut.mp4",
31
- label="How to use HTR Tool",
32
- )
33
- gr.Markdown(TextHowTo.reach_out)
34
-
35
- with gr.Tab("Stepwise HTR Tool"):
36
- with gr.Row(equal_height=False):
37
- gr.Markdown(TextHowTo.stepwise_htr_tool)
38
- with gr.Row():
39
- gr.Markdown(TextHowTo.stepwise_htr_tool_tab_intro)
40
- with gr.Row():
41
- with gr.Accordion("The tabs for the Stepwise HTR Tool:", open=True):
42
- with gr.Tabs():
43
- with gr.Tab("1. Region Segmentation"):
44
- gr.Markdown(TextHowTo.stepwise_htr_tool_tab1)
45
- with gr.Tab("2. Line Segmentation"):
46
- gr.Markdown(TextHowTo.stepwise_htr_tool_tab2)
47
- with gr.Tab("3. Transcribe Text"):
48
- gr.Markdown(TextHowTo.stepwise_htr_tool_tab3)
49
- with gr.Tab("4. Explore Results"):
50
- gr.Markdown(TextHowTo.stepwise_htr_tool_tab4)
51
- gr.Markdown(TextHowTo.stepwise_htr_tool_end)
52
-
53
- with gr.Tab("API & Duplicate for Privat use"):
54
- with gr.Row():
55
- with gr.Column():
56
- gr.Markdown(TextHowTo.htr_tool_api_text)
57
- gr.Code(
58
- value=TextHowTo.code_for_api,
59
- language="python",
60
- interactive=False,
61
- show_label=False,
62
- )
63
- with gr.Column():
64
- gr.Markdown(TextHowTo.duplicatin_space_htr_text)
65
- gr.Markdown(TextHowTo.figure_htr_hardware)
66
- gr.Markdown(TextHowTo.duplicatin_for_privat)
67
-
68
  with gr.Tab("About"):
69
  with gr.Tabs():
70
  with gr.Tab("Project"):
71
  with gr.Row():
72
  with gr.Column():
73
- gr.Markdown(TextAbout.intro_and_pipeline_overview_text)
74
  with gr.Column():
75
  gr.Markdown(TextAbout.text_src_code_data_models)
 
 
76
  with gr.Row():
77
  with gr.Tabs():
78
  with gr.Tab("I. Binarization"):
@@ -88,6 +123,21 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
88
  with gr.Row():
89
  gr.Markdown(TextRoadmap.text_contribution)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  with gr.Tab("Roadmap"):
92
  with gr.Row():
93
  with gr.Column():
@@ -95,7 +145,7 @@ with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
95
  with gr.Column():
96
  gr.Markdown(TextRoadmap.discussion)
97
 
98
- # demo.load(None, None, None, _js=js)
99
 
100
 
101
  demo.queue(concurrency_count=2, max_size=2)
 
1
+ import hashlib
2
+ import os
3
+ import shutil
4
+ import sqlite3
5
+ from datetime import datetime
6
+
7
  import gradio as gr
8
+ import huggingface_hub
9
+ import pandas as pd
10
+ import pytz
11
+ from apscheduler.schedulers.background import BackgroundScheduler
12
 
13
  from helper.gradio_config import css, theme
14
  from helper.text.text_about import TextAbout
 
18
  from tabs.htr_tool import htr_tool_tab
19
  from tabs.stepwise_htr_tool import stepwise_htr_tool_tab
20
 
21
+ DB_FILE = "./traffic_data.db"
22
+
23
+ TOKEN = os.environ.get("HUB_TOKEN")
24
+ repo = huggingface_hub.Repository(
25
+ local_dir="data", repo_type="dataset", clone_from="Riksarkivet/traffic_demo_data", use_auth_token=TOKEN
26
+ )
27
+ repo.git_pull()
28
+
29
+ # Set db to latest
30
+ shutil.copyfile("./data/traffic_data.db", DB_FILE)
31
+
32
+
33
+ def hash_ip(ip_address):
34
+ return hashlib.sha256(ip_address.encode()).hexdigest()
35
+
36
+
37
+ # Create table if it doesn't already exist
38
+ db = sqlite3.connect(DB_FILE)
39
+ try:
40
+ db.execute("SELECT * FROM ip_data").fetchall()
41
+ db.close()
42
+ except sqlite3.OperationalError:
43
+ db.execute(
44
+ """
45
+ CREATE TABLE ip_data (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
46
+ current_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
47
+ hashed_ip TEXT)
48
+ """
49
+ )
50
+ db.commit()
51
+ db.close()
52
+
53
+
54
+ def current_time_sw():
55
+ swedish_tz = pytz.timezone("Europe/Stockholm")
56
+ return datetime.now(swedish_tz).strftime("%Y-%m-%d %H:%M:%S")
57
+
58
+
59
+ def add_ip_data(request: gr.Request):
60
+ host = request.client.host
61
+ hashed_ip = hash_ip(host)
62
+
63
+ db = sqlite3.connect(DB_FILE)
64
+ cursor = db.cursor()
65
+ cursor.execute("INSERT INTO ip_data(current_time, hashed_ip) VALUES(?,?)", [current_time_sw(), hashed_ip])
66
+ db.commit()
67
+ db.close()
68
+
69
+
70
+ def backup_db():
71
+ shutil.copyfile(DB_FILE, "./data/traffic_data.db")
72
+ db = sqlite3.connect(DB_FILE)
73
+ ip_data = db.execute("SELECT * FROM ip_data").fetchall()
74
+ pd.DataFrame(ip_data, columns=["id", "current_time", "hashed_ip"]).to_csv("./data/ip_data.csv", index=False)
75
+
76
+ print("updating traffic_data")
77
+ repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.now()}")
78
+
79
+
80
+ scheduler = BackgroundScheduler()
81
+ scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
82
+ scheduler.start()
83
+
84
+
85
  with gr.Blocks(title="HTR Riksarkivet", theme=theme, css=css) as demo:
86
+ with gr.Row():
87
+ with gr.Column(scale=1):
88
+ text_ip_output = gr.Markdown()
89
+ with gr.Column(scale=1):
90
+ gr.Markdown(TextApp.title_markdown)
91
+ with gr.Column(scale=1):
92
+ gr.Markdown(TextApp.title_markdown_img)
93
 
94
  with gr.Tabs():
95
  with gr.Tab("HTR Tool"):
 
98
  with gr.Tab("Stepwise HTR Tool"):
99
  stepwise_htr_tool_tab.render()
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  with gr.Tab("About"):
102
  with gr.Tabs():
103
  with gr.Tab("Project"):
104
  with gr.Row():
105
  with gr.Column():
106
+ gr.Markdown(TextAbout.intro_text)
107
  with gr.Column():
108
  gr.Markdown(TextAbout.text_src_code_data_models)
109
+ with gr.Row():
110
+ gr.Markdown(TextAbout.pipeline_overview_text)
111
  with gr.Row():
112
  with gr.Tabs():
113
  with gr.Tab("I. Binarization"):
 
123
  with gr.Row():
124
  gr.Markdown(TextRoadmap.text_contribution)
125
 
126
+ with gr.Tab("API & Duplicate for Privat use"):
127
+ with gr.Row():
128
+ with gr.Column():
129
+ gr.Markdown(TextHowTo.htr_tool_api_text)
130
+ gr.Code(
131
+ value=TextHowTo.code_for_api,
132
+ language="python",
133
+ interactive=False,
134
+ show_label=False,
135
+ )
136
+ with gr.Column():
137
+ gr.Markdown(TextHowTo.duplicatin_space_htr_text)
138
+ gr.Markdown(TextHowTo.figure_htr_hardware)
139
+ gr.Markdown(TextHowTo.duplicatin_for_privat)
140
+
141
  with gr.Tab("Roadmap"):
142
  with gr.Row():
143
  with gr.Column():
 
145
  with gr.Column():
146
  gr.Markdown(TextRoadmap.discussion)
147
 
148
+ demo.load(add_ip_data)
149
 
150
 
151
  demo.queue(concurrency_count=2, max_size=2)
helper/text/text_about.py CHANGED
@@ -1,6 +1,6 @@
1
  class TextAbout:
2
  # About text
3
- intro_and_pipeline_overview_text = """
4
 
5
  ## Introduction
6
  The Swedish National Archives introduces a demonstrational end-to-end HTR (Handwritten Text Recognition) pipeline. This pipeline comprises two instance segmentation models: one designated for segmenting text-regions and another for isolating text-lines within these regions, coupled with an HTR model for image-to-text transcription. The objective of this project is to establish a generic pipeline capable of processing running-text documents spanning from 1600 to 1900.
@@ -12,10 +12,14 @@ class TextAbout:
12
  - Navigate to the > **About** > **Roadmap**.
13
 
14
  To understand how to utilize this application through a REST API, self-host or via Docker,
15
- - Navigate to the > **How to Use** > **API & Duplicate for Private Use**.
 
 
16
 
17
  ## The Pipeline in Overview
18
-
 
 
19
  The steps in the pipeline can be seen below as follows:
20
  """
21
 
 
1
  class TextAbout:
2
  # About text
3
+ intro_text = """
4
 
5
  ## Introduction
6
  The Swedish National Archives introduces a demonstrational end-to-end HTR (Handwritten Text Recognition) pipeline. This pipeline comprises two instance segmentation models: one designated for segmenting text-regions and another for isolating text-lines within these regions, coupled with an HTR model for image-to-text transcription. The objective of this project is to establish a generic pipeline capable of processing running-text documents spanning from 1600 to 1900.
 
12
  - Navigate to the > **About** > **Roadmap**.
13
 
14
  To understand how to utilize this application through a REST API, self-host or via Docker,
15
+ - Navigate to the > **About** > **How to Use** > **API & Duplicate for Private Use**.
16
+
17
+ """
18
 
19
  ## The Pipeline in Overview
20
+ pipeline_overview_text = """
21
+ ## The Pipeline in Overview
22
+
23
  The steps in the pipeline can be seen below as follows:
24
  """
25
 
helper/text/text_app.py CHANGED
@@ -1,12 +1,16 @@
1
  class TextApp:
2
  title_markdown = """
3
 
4
- <img src="https://raw.githubusercontent.com/Borg93/Riksarkivet_docs/main/docs/assets/fav-removebg-preview.png" width="4%" align="right" margin-right="100" />
5
 
6
- <h1><center> Handwritten Text Recognition Tool </center></h1>
7
 
8
  <h3><center> Swedish National Archives - Riksarkivet </center></h3>"""
9
 
 
 
 
 
 
10
 
11
  if __name__ == "__main__":
12
  pass
 
1
  class TextApp:
2
  title_markdown = """
3
 
 
4
 
5
+ <h1><center> HTRflow - Demo </center></h1>
6
 
7
  <h3><center> Swedish National Archives - Riksarkivet </center></h3>"""
8
 
9
+ title_markdown_img = """
10
+ <img src="https://raw.githubusercontent.com/Borg93/Riksarkivet_docs/main/docs/assets/fav-removebg-preview.png" width="13%" align="right" margin-right="100" />
11
+
12
+ """
13
+
14
 
15
  if __name__ == "__main__":
16
  pass
requirements.txt CHANGED
@@ -7,9 +7,13 @@ numpy==1.25.0
7
  opencv-python-headless==4.7.0.72
8
  Jinja2==3.1.2
9
  transformers==4.30.2
10
- huggingface-hub==0.15.1
11
  datasets==2.14.5
12
  requests==2.31.0
 
 
 
 
13
  # pillow==9.5.0
14
 
15
 
 
7
  opencv-python-headless==4.7.0.72
8
  Jinja2==3.1.2
9
  transformers==4.30.2
10
+ huggingface-hub
11
  datasets==2.14.5
12
  requests==2.31.0
13
+ apscheduler
14
+ pytz
15
+ jiwer
16
+ evaluate
17
  # pillow==9.5.0
18
 
19
 
src/htr_pipeline/gradio_backend.py CHANGED
@@ -117,7 +117,7 @@ class CustomTrack:
117
  gr.update(visible=True),
118
  )
119
 
120
- def transcribe_text(self, df, images):
121
  gr.Info("Running Transcribe Lines")
122
  transcription_temp_list_with_score = []
123
  mapping_dict = {}
@@ -142,11 +142,11 @@ class CustomTrack:
142
  transcription_temp_list_with_score, columns=["Transcribed text", "Pred score"]
143
  )
144
 
 
 
145
  mapping_dict[transcribed_text] = image
146
 
147
- yield df_trans_explore[
148
- ["Transcribed text"]
149
- ], df_trans_explore, mapping_dict, bool_to_show_control_results_transcribe, bool_to_show_placeholder
150
 
151
  def get_select_index_image(self, images_from_gallery, evt: gr.SelectData):
152
  return images_from_gallery[evt.index]["name"]
 
117
  gr.update(visible=True),
118
  )
119
 
120
+ def transcribe_text(self, images):
121
  gr.Info("Running Transcribe Lines")
122
  transcription_temp_list_with_score = []
123
  mapping_dict = {}
 
142
  transcription_temp_list_with_score, columns=["Transcribed text", "Pred score"]
143
  )
144
 
145
+ joined_transcription_temp_list = "\n".join([tup[0] for tup in transcription_temp_list_with_score])
146
+
147
  mapping_dict[transcribed_text] = image
148
 
149
+ yield joined_transcription_temp_list, df_trans_explore, mapping_dict, bool_to_show_control_results_transcribe, bool_to_show_placeholder
 
 
150
 
151
  def get_select_index_image(self, images_from_gallery, evt: gr.SelectData):
152
  return images_from_gallery[evt.index]["name"]
tabs/htr_tool.py CHANGED
@@ -155,6 +155,15 @@ with gr.Blocks() as htr_tool_tab:
155
  api_name="predict",
156
  )
157
 
 
 
 
 
 
 
 
 
 
158
  def update_selected_tab_output_and_setting():
159
  return gr.update(visible=True), gr.update(visible=False)
160
 
 
155
  api_name="predict",
156
  )
157
 
158
+ def dummy_update_htr_tool_transcriber_model_dropdown(htr_tool_transcriber_model_dropdown):
159
+ return gr.update(value="Riksarkivet/satrn_htr")
160
+
161
+ htr_tool_transcriber_model_dropdown.change(
162
+ fn=dummy_update_htr_tool_transcriber_model_dropdown,
163
+ inputs=htr_tool_transcriber_model_dropdown,
164
+ outputs=htr_tool_transcriber_model_dropdown,
165
+ )
166
+
167
  def update_selected_tab_output_and_setting():
168
  return gr.update(visible=True), gr.update(visible=False)
169
 
tabs/stepwise_htr_tool.py CHANGED
@@ -1,9 +1,11 @@
1
  import os
2
  import shutil
3
 
 
4
  import gradio as gr
5
 
6
  from helper.examples.examples import DemoImages
 
7
  from src.htr_pipeline.gradio_backend import CustomTrack, SingletonModelLoader
8
 
9
  model_loader = SingletonModelLoader()
@@ -12,9 +14,29 @@ custom_track = CustomTrack(model_loader)
12
 
13
  images_for_demo = DemoImages()
14
 
 
 
 
15
  with gr.Blocks() as stepwise_htr_tool_tab:
16
  with gr.Tabs():
17
  with gr.Tab("1. Region Segmentation"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  with gr.Row():
19
  with gr.Column(scale=2):
20
  vis_data_folder_placeholder = gr.Markdown(visible=False)
@@ -25,38 +47,9 @@ with gr.Blocks() as stepwise_htr_tool_tab:
25
  label="Image to Region segment",
26
  # type="numpy",
27
  tool="editor",
28
- height=350,
29
  )
30
 
31
- with gr.Accordion("Region segment settings:", open=False):
32
- with gr.Row():
33
- reg_pred_score_threshold_slider = gr.Slider(
34
- minimum=0.4,
35
- maximum=1,
36
- value=0.5,
37
- step=0.05,
38
- label="P-threshold",
39
- info="""Filter and determine the confidence score
40
- required for a prediction score to be considered""",
41
- )
42
- reg_containments_threshold_slider = gr.Slider(
43
- minimum=0,
44
- maximum=1,
45
- value=0.5,
46
- step=0.05,
47
- label="C-threshold",
48
- info="""The minimum required overlap or similarity
49
- for a detected region or object to be considered valid""",
50
- )
51
-
52
- with gr.Row():
53
- region_segment_model_dropdown = gr.Dropdown(
54
- choices=["Riksarkivet/RmtDet_region"],
55
- value="Riksarkivet/RmtDet_region",
56
- label="Region segment model",
57
- info="Will add more models later!",
58
- )
59
-
60
  with gr.Row():
61
  clear_button = gr.Button("Clear", variant="secondary", elem_id="clear_button")
62
 
@@ -66,7 +59,36 @@ with gr.Blocks() as stepwise_htr_tool_tab:
66
  elem_id="region_segment_button",
67
  )
68
 
69
- with gr.Row():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  with gr.Accordion("Example images to use:", open=False) as example_accord:
71
  gr.Examples(
72
  examples=images_for_demo.examples_list,
@@ -76,7 +98,7 @@ with gr.Blocks() as stepwise_htr_tool_tab:
76
  )
77
 
78
  with gr.Column(scale=3):
79
- output_region_image = gr.Image(label="Segmented regions", type="numpy", height=600)
80
 
81
  ##############################################
82
  with gr.Tab("2. Line Segmentation"):
@@ -188,14 +210,11 @@ with gr.Blocks() as stepwise_htr_tool_tab:
188
 
189
  with gr.Column(scale=3):
190
  with gr.Row():
191
- transcribed_text_df = gr.Dataframe(
192
- headers=["Transcribed text"],
193
- max_rows=14,
194
- col_count=(1, "fixed"),
195
- wrap=True,
196
- interactive=False,
197
- overflow_row_behaviour="paginate",
198
- height=600,
199
  )
200
 
201
  #####################################
@@ -219,18 +238,27 @@ with gr.Blocks() as stepwise_htr_tool_tab:
219
  columns=[3],
220
  rows=[3],
221
  # object_fit="contain",
222
- height=300,
223
  preview=True,
224
  container=False,
225
  )
226
 
227
  dataframe_text_index = gr.Textbox(
228
  label="Text from DataFrame selection",
229
- info="Click on a dataframe cell to view the corresponding transcribed text line crop. You can also sort the dataframe to easily locate specific entries.",
230
- lines=2,
231
  interactive=False,
232
  )
233
 
 
 
 
 
 
 
 
 
 
 
234
  with gr.Column(scale=1, visible=True):
235
  mapping_dict = gr.Variable()
236
  transcribed_text_df_finish = gr.Dataframe(
@@ -279,9 +307,9 @@ with gr.Blocks() as stepwise_htr_tool_tab:
279
 
280
  transcribe_button.click(
281
  custom_track.transcribe_text,
282
- inputs=[transcribed_text_df, inputs_lines_to_transcribe],
283
  outputs=[
284
- transcribed_text_df,
285
  transcribed_text_df_finish,
286
  mapping_dict,
287
  # Hide
@@ -290,6 +318,14 @@ with gr.Blocks() as stepwise_htr_tool_tab:
290
  ],
291
  )
292
 
 
 
 
 
 
 
 
 
293
  clear_button.click(
294
  lambda: (
295
  (shutil.rmtree("./vis_data") if os.path.exists("./vis_data") else None, None)[1],
@@ -318,7 +354,7 @@ with gr.Blocks() as stepwise_htr_tool_tab:
318
  control_line_segment,
319
  output_line_from_region,
320
  inputs_lines_to_transcribe,
321
- transcribed_text_df,
322
  control_htr,
323
  inputs_lines_to_transcribe,
324
  image_placeholder_htr,
 
1
  import os
2
  import shutil
3
 
4
+ import evaluate
5
  import gradio as gr
6
 
7
  from helper.examples.examples import DemoImages
8
+ from helper.text.text_howto import TextHowTo
9
  from src.htr_pipeline.gradio_backend import CustomTrack, SingletonModelLoader
10
 
11
  model_loader = SingletonModelLoader()
 
14
 
15
  images_for_demo = DemoImages()
16
 
17
+ cer_metric = evaluate.load("cer")
18
+
19
+
20
  with gr.Blocks() as stepwise_htr_tool_tab:
21
  with gr.Tabs():
22
  with gr.Tab("1. Region Segmentation"):
23
+ with gr.Row():
24
+ with gr.Accordion("Info", open=False) as example_accord:
25
+ with gr.Row(equal_height=False):
26
+ gr.Markdown(TextHowTo.stepwise_htr_tool)
27
+ with gr.Row():
28
+ gr.Markdown(TextHowTo.stepwise_htr_tool_tab_intro)
29
+ with gr.Row():
30
+ with gr.Tabs():
31
+ with gr.Tab("1. Region Segmentation"):
32
+ gr.Markdown(TextHowTo.stepwise_htr_tool_tab1)
33
+ with gr.Tab("2. Line Segmentation"):
34
+ gr.Markdown(TextHowTo.stepwise_htr_tool_tab2)
35
+ with gr.Tab("3. Transcribe Text"):
36
+ gr.Markdown(TextHowTo.stepwise_htr_tool_tab3)
37
+ with gr.Tab("4. Explore Results"):
38
+ gr.Markdown(TextHowTo.stepwise_htr_tool_tab4)
39
+ gr.Markdown(TextHowTo.stepwise_htr_tool_end)
40
  with gr.Row():
41
  with gr.Column(scale=2):
42
  vis_data_folder_placeholder = gr.Markdown(visible=False)
 
47
  label="Image to Region segment",
48
  # type="numpy",
49
  tool="editor",
50
+ height=400,
51
  )
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  with gr.Row():
54
  clear_button = gr.Button("Clear", variant="secondary", elem_id="clear_button")
55
 
 
59
  elem_id="region_segment_button",
60
  )
61
 
62
+ with gr.Group():
63
+ with gr.Accordion("Region segment settings:", open=False):
64
+ with gr.Row():
65
+ reg_pred_score_threshold_slider = gr.Slider(
66
+ minimum=0.4,
67
+ maximum=1,
68
+ value=0.5,
69
+ step=0.05,
70
+ label="P-threshold",
71
+ info="""Filter and determine the confidence score
72
+ required for a prediction score to be considered""",
73
+ )
74
+ reg_containments_threshold_slider = gr.Slider(
75
+ minimum=0,
76
+ maximum=1,
77
+ value=0.5,
78
+ step=0.05,
79
+ label="C-threshold",
80
+ info="""The minimum required overlap or similarity
81
+ for a detected region or object to be considered valid""",
82
+ )
83
+
84
+ with gr.Row():
85
+ region_segment_model_dropdown = gr.Dropdown(
86
+ choices=["Riksarkivet/RmtDet_region"],
87
+ value="Riksarkivet/RmtDet_region",
88
+ label="Region segment model",
89
+ info="Will add more models later!",
90
+ )
91
+
92
  with gr.Accordion("Example images to use:", open=False) as example_accord:
93
  gr.Examples(
94
  examples=images_for_demo.examples_list,
 
98
  )
99
 
100
  with gr.Column(scale=3):
101
+ output_region_image = gr.Image(label="Segmented regions", type="numpy", height=550)
102
 
103
  ##############################################
104
  with gr.Tab("2. Line Segmentation"):
 
210
 
211
  with gr.Column(scale=3):
212
  with gr.Row():
213
+ transcribed_text = gr.Textbox(
214
+ label="Transcribed text",
215
+ info="Transcribed text is being streamed back from the HTR-model",
216
+ lines=25,
217
+ value="",
 
 
 
218
  )
219
 
220
  #####################################
 
238
  columns=[3],
239
  rows=[3],
240
  # object_fit="contain",
241
+ height=250,
242
  preview=True,
243
  container=False,
244
  )
245
 
246
  dataframe_text_index = gr.Textbox(
247
  label="Text from DataFrame selection",
248
+ placeholder="Select row from the DataFrame.",
 
249
  interactive=False,
250
  )
251
 
252
+ gt_text_index = gr.Textbox(
253
+ label="Ground Truth",
254
+ placeholder="Provide the ground truth, if available.",
255
+ interactive=True,
256
+ )
257
+ with gr.Row(equal_height=False):
258
+ calc_cer_button = gr.Button("Calculate CER", variant="primary", visible=True)
259
+
260
+ cer_output = gr.Textbox(label="CER:")
261
+
262
  with gr.Column(scale=1, visible=True):
263
  mapping_dict = gr.Variable()
264
  transcribed_text_df_finish = gr.Dataframe(
 
307
 
308
  transcribe_button.click(
309
  custom_track.transcribe_text,
310
+ inputs=[inputs_lines_to_transcribe],
311
  outputs=[
312
+ transcribed_text,
313
  transcribed_text_df_finish,
314
  mapping_dict,
315
  # Hide
 
318
  ],
319
  )
320
 
321
+ def compute_cer(dataframe_text_index, gt_text_index):
322
+ if gt_text_index is not None and gt_text_index.strip() != "":
323
+ return cer_metric.compute(predictions=[dataframe_text_index], references=[gt_text_index])
324
+ else:
325
+ return "Ground truth not provided"
326
+
327
+ calc_cer_button.click(compute_cer, inputs=[dataframe_text_index, gt_text_index], outputs=cer_output)
328
+
329
  clear_button.click(
330
  lambda: (
331
  (shutil.rmtree("./vis_data") if os.path.exists("./vis_data") else None, None)[1],
 
354
  control_line_segment,
355
  output_line_from_region,
356
  inputs_lines_to_transcribe,
357
+ transcribed_text,
358
  control_htr,
359
  inputs_lines_to_transcribe,
360
  image_placeholder_htr,