Spaces:

Riksarkivet
/

htr_demo

Running on T4

App Files Files Community

Gabriel commited on Nov 7, 2023

Commit

c60ebd1

1 Parent(s): d1b10a8

0.0.3 release with Trocr and compare support

Browse files

Files changed (15) hide show

.github/README.md +7 -0
.gitignore +8 -1
app.py +15 -0
helper/text/overview/changelog_roadmap/changelog.md +4 -29
helper/text/overview/changelog_roadmap/old_changelog.md +39 -0
helper/text/text_app.py +1 -1
helper/text/text_overview.py +2 -0
src/htr_pipeline/gradio_backend.py +84 -9
src/htr_pipeline/inferencer.py +20 -10
src/htr_pipeline/models.py +9 -7
src/htr_pipeline/pipeline.py +7 -1
src/htr_pipeline/utils/pipeline_inferencer.py +18 -2
tabs/htr_tool.py +86 -64
tabs/overview_tab.py +3 -1
tabs/stepwise_htr_tool.py +26 -4

.github/README.md CHANGED Viewed

@@ -15,6 +15,13 @@ HTRFLOW is more than just a demo; it's a testament to the advancement of open so
 ## Run app
 Install libraries with Makefile:
 ```

 ## Run app
+Use virtual env.
+```
+python3 -m venv .venv
+source .venv/bin/activate
+```
 Install libraries with Makefile:
 ```

.gitignore CHANGED Viewed

@@ -28,4 +28,11 @@ TODO.md
 .cache_images/
 traffic_data.db
 ip_data.csv
-data/

 .cache_images/
 traffic_data.db
 ip_data.csv
+data/
+#mlflow
+mlruns/
+test.ipynb
+#models
+models--Riksarkivet--HTR_pipeline_models/

app.py CHANGED Viewed

@@ -31,6 +31,21 @@ with gr.Blocks(title="Riksarkivet", theme=theme, css=css) as demo:
         with gr.Tab("Overview"):
             overview.render()
     SECRET_KEY = os.environ.get("HUB_TOKEN", False)
     if SECRET_KEY:
         demo.load(

         with gr.Tab("Overview"):
             overview.render()
+        with gr.Tab("How to use"):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown("## Fast track")
+                    gr.Video(
+                        value="https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/eating_spaghetti.mp4",
+                        format="mp4",
+                    )
+                with gr.Column():
+                    gr.Markdown("## Stepwise")
+                    gr.Video(
+                        "https://github.com/Borg93/htr_gradio_file_placeholder/blob/main/htr_tool_media_cut.mp4",
+                        format="mp4",
+                    )
     SECRET_KEY = os.environ.get("HUB_TOKEN", False)
     if SECRET_KEY:
         demo.load(

helper/text/overview/changelog_roadmap/changelog.md CHANGED Viewed

@@ -2,38 +2,13 @@
 All notable changes to HTRFLOW will be documented here.
-### [0.0.2] - 2023-11-01
 #### Added
-- Better documentation for API, see **Overview** > **Duplicating for own use & API**
-- Better documentation for restrictions of app, see **Overview** > **HTRFLOW**
 #### Fixed
-- Fixed bug for API, [issue](https://github.com/Riksarkivet/HTRFLOW/issues/2)
-#### Changed
-- Changed named for **FAQ & Discussion** to **FAQ & Contact**
----
-### [0.0.1] - 2023-10-23
-#### Added
-- Added a new feature to **Stepwise** > **Explore results** > New Text diff and CER component
-#### Fixed
-- Fixed naming conventions of tabs in app so they are more coherent with the code.
-#### Changed
-- Changed the layout in both Fast track and Stepwise to improve the UX
-  - Examples are viewed in the middle of the layout
-  - "Advanced settings" are initial hidden
-- Removed **help** tab for now (documentation of Fast track and Stepwise will come in a later release)

 All notable changes to HTRFLOW will be documented here.
+### [0.0.3] - 2023-11-06
 #### Added
+- Support for TROCR -> Latin and Eng model
+- New feature! Compare different runs with GT, see tab **Fast track** > **Compare**
 #### Fixed
+- Fixed bug for Docker and running app locally, [issue](https://github.com/Riksarkivet/HTRFLOW/issues/2)

helper/text/overview/changelog_roadmap/old_changelog.md ADDED Viewed

	@@ -0,0 +1,39 @@

+## Changelog
+All notable changes to HTRFLOW will be documented here.
+### [0.0.2] - 2023-11-01
+#### Added
+- Better documentation for API, see **Overview** > **Duplicating for own use & API**
+- Better documentation for restrictions of app, see **Overview** > **HTRFLOW**
+#### Fixed
+- Fixed bug for API, [issue](https://github.com/Riksarkivet/HTRFLOW/issues/2)
+#### Changed
+- Changed named for **FAQ & Discussion** to **FAQ & Contact**
+---
+### [0.0.1] - 2023-10-23
+#### Added
+- Added a new feature to **Stepwise** > **Explore results** > New Text diff and CER component
+#### Fixed
+- Fixed naming conventions of tabs in app so they are more coherent with the code.
+#### Changed
+- Changed the layout in both Fast track and Stepwise to improve the UX
+  - Examples are viewed in the middle of the layout
+  - "Advanced settings" are initial hidden
+- Removed **help** tab for now (documentation of Fast track and Stepwise will come in a later release)

helper/text/text_app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 class TextApp:
-    demo_version = """<em>Version 0.0.2</em>"""
     title_markdown = """

 class TextApp:
+    demo_version = """<em>Version 0.0.3</em>"""
     title_markdown = """

helper/text/text_overview.py CHANGED Viewed

@@ -21,6 +21,8 @@ class TextOverview:
     # Changelog & Roadmap
     changelog = read_markdown("helper/text/overview/changelog_roadmap/changelog.md")
     roadmap = read_markdown("helper/text/overview/changelog_roadmap/roadmap.md")
     # duplicate & api

     # Changelog & Roadmap
     changelog = read_markdown("helper/text/overview/changelog_roadmap/changelog.md")
+    old_changelog = read_markdown("helper/text/overview/changelog_roadmap/old_changelog.md")
     roadmap = read_markdown("helper/text/overview/changelog_roadmap/roadmap.md")
     # duplicate & api

src/htr_pipeline/gradio_backend.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import os
 import cv2
 import gradio as gr
 import numpy as np
 import pandas as pd
@@ -36,7 +39,7 @@ class FastTrack:
     def __init__(self, model_loader):
         self.pipeline: PipelineInterface = model_loader.pipeline
-    def segment_to_xml(self, image, radio_button_choices):
         handling_callback_stop_inferencer()
         gr.Info("Excuting HTR on image")
@@ -46,7 +49,9 @@ class FastTrack:
         if os.path.exists(f"./{xml_xml}"):
             os.remove(f"./{xml_xml}")
-        rendered_xml = self.pipeline.running_htr_pipeline(image)
         with open(xml_xml, "w") as f:
             f.write(rendered_xml)
@@ -172,13 +177,83 @@ class CustomTrack:
         return file_name, gr.update(visible=True)
-    # def transcribe_text_another_model(self, df, images):
-    #     transcription_temp_list = []
-    #     for image in images:
-    #         transcribed_text = inferencer.transcribe_different_model(image)
-    #         transcription_temp_list.append(transcribed_text)
-    #         df_trans = pd.DataFrame(transcription_temp_list, columns=["Transcribed_text"])
-    #         yield df_trans, df_trans, gr.update(visible=False)
 if __name__ == "__main__":

 import os
+import xml.etree.ElementTree as ET
+from difflib import Differ
 import cv2
+import evaluate
 import gradio as gr
 import numpy as np
 import pandas as pd
     def __init__(self, model_loader):
         self.pipeline: PipelineInterface = model_loader.pipeline
+    def segment_to_xml(self, image, radio_button_choices, htr_tool_transcriber_model_dropdown):
         handling_callback_stop_inferencer()
         gr.Info("Excuting HTR on image")
         if os.path.exists(f"./{xml_xml}"):
             os.remove(f"./{xml_xml}")
+        htr_tool_transcriber_model_dropdown
+        rendered_xml = self.pipeline.running_htr_pipeline(image, htr_tool_transcriber_model_dropdown)
         with open(xml_xml, "w") as f:
             f.write(rendered_xml)
         return file_name, gr.update(visible=True)
+# Temporary structured here...
+def upload_file(files):
+    return files.name, gr.update(visible=True)
+def diff_texts(text1, text2):
+    d = Differ()
+    return [(token[2:], token[0] if token[0] != " " else None) for token in d.compare(text1, text2)]
+def compute_cer_a_and_b_with_gt(run_a, run_b, run_gt):
+    text_run_a, text_run_b, text_run_gt = reading_xml_files_string(run_a, run_b, run_gt)
+    cer_metric = evaluate.load("cer")
+    if text_run_a == text_run_gt:
+        return "No Ground Truth was provided."
+    elif text_run_a == text_run_b:
+        return f"A & B -> GT: {round(cer_metric.compute(predictions=[text_run_a], references=[text_run_gt]), 4)}"
+    else:
+        return f"A -> GT: {round(cer_metric.compute(predictions=[text_run_a], references=[text_run_gt]), 4)}, B -> GT {round(cer_metric.compute(predictions=[text_run_b], references=[text_run_gt]), 4)}"
+def temporary_xml_parser(page_xml):
+    tree = ET.parse(page_xml, parser=ET.XMLParser(encoding="utf-8"))
+    root = tree.getroot()
+    namespace = "{http://schema.primaresearch.org/PAGE/gts/pagecontent/2013-07-15}"
+    text_list = []
+    for textregion in root.findall(f".//{namespace}TextRegion"):
+        for textline in textregion.findall(f".//{namespace}TextLine"):
+            text = textline.find(f"{namespace}TextEquiv").find(f"{namespace}Unicode").text
+            text_list.append(text)
+    return " ".join(text_list)
+def compare_diff_runs_highlight(run_a, run_b, run_gt):
+    text_run_a, text_run_b, text_run_gt = reading_xml_files_string(run_a, run_b, run_gt)
+    diff_runs = diff_texts(text_run_a, text_run_b)
+    diff_gt = diff_texts(text_run_a, text_run_gt)
+    return diff_runs, diff_gt
+def reading_xml_files_string(run_a, run_b, run_gt):
+    if run_a is None:
+        return
+    if run_gt is None:
+        gr.Warning("No GT was provided, setting GT to A")
+        run_gt = run_a
+    if run_b is None:
+        gr.Warning("No B was provided, setting B to A")
+        run_b = run_a
+    text_run_a = temporary_xml_parser(run_a.name)
+    text_run_b = temporary_xml_parser(run_b.name)
+    text_run_gt = temporary_xml_parser(run_gt.name)
+    return text_run_a, text_run_b, text_run_gt
+def update_selected_tab_output_and_setting():
+    return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+def update_selected_tab_image_viewer():
+    return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
+def update_selected_tab_model_compare():
+    return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
 if __name__ == "__main__":

src/htr_pipeline/inferencer.py CHANGED Viewed

@@ -3,6 +3,8 @@ from typing import Protocol, Tuple
 import gradio as gr
 import mmcv
 import numpy as np
 from src.htr_pipeline.models import HtrModels
 from src.htr_pipeline.utils.filter_segmask import FilterSegMask
@@ -116,20 +118,28 @@ class Inferencer:
         result_rec = self.htr_model_inferencer(line_cropped)
         return result_rec["predictions"][0]["text"], round(result_rec["predictions"][0]["scores"], 4)
-    # def transcribe_different_model(self, image):
-    #     processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
-    #     model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwritten")
-    #     # prepare image
-    #     pixel_values = processor(image, return_tensors="pt").pixel_values
-    #     # generate (no beam search)
-    #     generated_ids = model.generate(pixel_values)
-    #     # decode
-    #     generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    #     return generated_text
 class InferencerInterface(Protocol):

 import gradio as gr
 import mmcv
 import numpy as np
+import torch
+from transformers import AutoImageProcessor, TrOCRProcessor, VisionEncoderDecoderModel
 from src.htr_pipeline.models import HtrModels
 from src.htr_pipeline.utils.filter_segmask import FilterSegMask
         result_rec = self.htr_model_inferencer(line_cropped)
         return result_rec["predictions"][0]["text"], round(result_rec["predictions"][0]["scores"], 4)
+    @timer_func
+    def transcribe_different_model(self, image, htr_tool_transcriber_model_dropdown):
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        if htr_tool_transcriber_model_dropdown == "pstroe/bullinger-general-model":
+            processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-handwritten")
+            image_processor = AutoImageProcessor.from_pretrained("pstroe/bullinger-general-model")
+            model = VisionEncoderDecoderModel.from_pretrained("pstroe/bullinger-general-model")
+            pixel_values = image_processor(image, return_tensors="pt").pixel_values.to(device)
+        else:
+            processor = TrOCRProcessor.from_pretrained(htr_tool_transcriber_model_dropdown)
+            model = VisionEncoderDecoderModel.from_pretrained(htr_tool_transcriber_model_dropdown)
+            pixel_values = processor(image, return_tensors="pt").pixel_values.to(device)
+        model.to(device)
+        generated_ids = model.generate(pixel_values)
+        generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+        return generated_text, 1.0
 class InferencerInterface(Protocol):

src/htr_pipeline/models.py CHANGED Viewed

@@ -11,26 +11,28 @@ from mmocr.apis import TextRecInferencer
 class HtrModels:
     def __init__(self, local_run=False):
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
-        SECRET_KEY = os.environ.get("AM_I_IN_A_DOCKER_CONTAINER", False)
         model_folder = "./models"
         self.region_config = f"{model_folder}/RmtDet_regions/rtmdet_m_textregions_2_concat.py"
         self.line_config = f"{model_folder}/RmtDet_lines/rtmdet_m_textlines_2_concat.py"
         self.line_checkpoint = f"{model_folder}/RmtDet_lines/epoch_12.pth"
         self.mmocr_config = f"{model_folder}/SATRN/_base_satrn_shallow_concat.py"
-        if SECRET_KEY:
             config_path = self.get_config()
             self.region_checkpoint = config_path["region_checkpoint"]
             self.line_checkpoint = config_path["line_checkpoint"]
             self.mmocr_checkpoint = config_path["mmocr_checkpoint"]
-        else:
-            self.region_checkpoint = f"{model_folder}/RmtDet_regions/epoch_12.pth"
-            self.line_checkpoint = f"{model_folder}/RmtDet_lines/epoch_12.pth"
-            self.mmocr_checkpoint = f"{model_folder}/SATRN/epoch_5.pth"
     def load_region_model(self):
         # build the model from a config file and a checkpoint file
         return DetInferencer(self.region_config, self.region_checkpoint, device=self.device)

 class HtrModels:
     def __init__(self, local_run=False):
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         model_folder = "./models"
         self.region_config = f"{model_folder}/RmtDet_regions/rtmdet_m_textregions_2_concat.py"
+        self.region_checkpoint = f"{model_folder}/RmtDet_regions/epoch_12.pth"
         self.line_config = f"{model_folder}/RmtDet_lines/rtmdet_m_textlines_2_concat.py"
         self.line_checkpoint = f"{model_folder}/RmtDet_lines/epoch_12.pth"
         self.mmocr_config = f"{model_folder}/SATRN/_base_satrn_shallow_concat.py"
+        self.mmocr_checkpoint = f"{model_folder}/SATRN/epoch_5.pth"
+        # Check if model files exist at the specified paths, if not, get the config
+        if not (
+            os.path.exists(self.region_checkpoint)
+            and os.path.exists(self.line_checkpoint)
+            and os.path.exists(self.mmocr_checkpoint)
+        ):
             config_path = self.get_config()
             self.region_checkpoint = config_path["region_checkpoint"]
             self.line_checkpoint = config_path["line_checkpoint"]
             self.mmocr_checkpoint = config_path["mmocr_checkpoint"]
     def load_region_model(self):
         # build the model from a config file and a checkpoint file
         return DetInferencer(self.region_config, self.region_checkpoint, device=self.device)

src/htr_pipeline/pipeline.py CHANGED Viewed

@@ -23,6 +23,7 @@ class Pipeline:
     def running_htr_pipeline(
         self,
         input_image: np.ndarray,
         pred_score_threshold_regions: float = 0.4,
         pred_score_threshold_lines: float = 0.4,
         containments_threshold: float = 0.5,
@@ -31,7 +32,12 @@ class Pipeline:
         image = mmcv.imread(input_image)
         rendered_xml = self.pipeline_inferencer.image_to_page_xml(
-            image, pred_score_threshold_regions, pred_score_threshold_lines, containments_threshold, self.inferencer
         )
         return rendered_xml

     def running_htr_pipeline(
         self,
         input_image: np.ndarray,
+        htr_tool_transcriber_model_dropdown,
         pred_score_threshold_regions: float = 0.4,
         pred_score_threshold_lines: float = 0.4,
         containments_threshold: float = 0.5,
         image = mmcv.imread(input_image)
         rendered_xml = self.pipeline_inferencer.image_to_page_xml(
+            image,
+            htr_tool_transcriber_model_dropdown,
+            pred_score_threshold_regions,
+            pred_score_threshold_lines,
+            containments_threshold,
+            self.inferencer,
         )
         return rendered_xml

src/htr_pipeline/utils/pipeline_inferencer.py CHANGED Viewed

@@ -15,8 +15,17 @@ class PipelineInferencer:
         self.xml_helper = xml_helper
     def image_to_page_xml(
-        self, image, pred_score_threshold_regions, pred_score_threshold_lines, containments_threshold, inferencer
     ):
         template_data = self.xml_helper.prepare_template_data(self.xml_helper.xml_file_name, image)
         template_data["textRegions"] = self._process_regions(
             image, inferencer, pred_score_threshold_regions, pred_score_threshold_lines, containments_threshold
@@ -121,7 +130,14 @@ class PipelineInferencer:
     def _create_line_data(self, line, line_pol, index, region_id, inferencer, htr_threshold):
         line_data = {"id": f"line_{region_id}_{index}", "boundary": line_pol}
-        transcribed_text, htr_score = inferencer.transcribe(line)
         line_data["unicode"] = self.xml_helper.escape_xml_chars(transcribed_text)
         line_data["pred_score"] = round(htr_score, 4)

         self.xml_helper = xml_helper
     def image_to_page_xml(
+        self,
+        image,
+        htr_tool_transcriber_model_dropdown,
+        pred_score_threshold_regions,
+        pred_score_threshold_lines,
+        containments_threshold,
+        inferencer,
     ):
+        # temporary solutions.. for trocr..
+        self.htr_tool_transcriber_model_dropdown = htr_tool_transcriber_model_dropdown
         template_data = self.xml_helper.prepare_template_data(self.xml_helper.xml_file_name, image)
         template_data["textRegions"] = self._process_regions(
             image, inferencer, pred_score_threshold_regions, pred_score_threshold_lines, containments_threshold
     def _create_line_data(self, line, line_pol, index, region_id, inferencer, htr_threshold):
         line_data = {"id": f"line_{region_id}_{index}", "boundary": line_pol}
+        # temporary solution..
+        if self.htr_tool_transcriber_model_dropdown == "Riksarkivet/satrn_htr":
+            transcribed_text, htr_score = inferencer.transcribe(line)
+        else:
+            transcribed_text, htr_score = inferencer.transcribe_different_model(
+                line, self.htr_tool_transcriber_model_dropdown
+            )
         line_data["unicode"] = self.xml_helper.escape_xml_chars(transcribed_text)
         line_data["pred_score"] = round(htr_score, 4)

tabs/htr_tool.py CHANGED Viewed

@@ -4,7 +4,16 @@ import gradio as gr
 from helper.examples.examples import DemoImages
 from helper.utils import TrafficDataHandler
-from src.htr_pipeline.gradio_backend import FastTrack, SingletonModelLoader
 model_loader = SingletonModelLoader()
 fast_track = FastTrack(model_loader)
@@ -55,17 +64,14 @@ with gr.Blocks() as htr_tool_tab:
                     )
                 with gr.Tab("Compare") as tab_model_compare_selector:
-                    with gr.Box():
-                        gr.Markdown(
-                            """
-                            **Work in progress**
-                            Compare different runs with uploaded Ground Truth and calculate CER. You will also be able to upload output format files
-                            """
-                        )
                         calc_cer_button_fast = gr.Button("Calculate CER", variant="primary", visible=True)
         with gr.Column(scale=4):
             with gr.Box():
@@ -142,7 +148,11 @@ with gr.Blocks() as htr_tool_tab:
                                     with gr.Row():
                                         htr_tool_transcriber_model_dropdown = gr.Dropdown(
-                                            choices=["Riksarkivet/satrn_htr", "microsoft/trocr-base-handwritten"],
                                             value="Riksarkivet/satrn_htr",
                                             label="Text recognition models",
                                             info="More models will be added",
@@ -167,50 +177,62 @@ with gr.Blocks() as htr_tool_tab:
                     )
                 with gr.Column(visible=False) as model_compare_selector:
-                    gr.Markdown("**Work in progress:**")
                     with gr.Row():
-                        gr.Radio(
-                            choices=["Compare Page XML", "Compare different runs"],
-                            value="Compare Page XML",
-                            info="Compare different runs from HTRFLOW or with external runs.",
-                        )
                     with gr.Row():
-                        gr.UploadButton(label="Run A")
-                        gr.UploadButton(label="Run B")
-                        gr.UploadButton(label="Ground Truth")
-                    with gr.Row():
-                        gr.HighlightedText(
-                            label="Text diff runs",
                             combine_adjacent=True,
                             show_legend=True,
                             color_map={"+": "red", "-": "green"},
                         )
-                    with gr.Row():
-                        gr.HighlightedText(
-                            label="Text diff ground truth",
                             combine_adjacent=True,
                             show_legend=True,
                             color_map={"+": "red", "-": "green"},
                         )
-                    with gr.Row():
-                        with gr.Column(scale=1):
-                            with gr.Row(equal_height=False):
-                                cer_output_fast = gr.Textbox(label="CER:")
-                        with gr.Column(scale=2):
-                            pass
         xml_rendered_placeholder_for_api = gr.Textbox(placeholder="XML", visible=False)
     htr_event_click_event = htr_pipeline_button.click(
         fast_track.segment_to_xml,
-        inputs=[fast_track_input_region_image, radio_file_input],
         outputs=[fast_file_downlod, fast_file_downlod],
-        queue=False,
         api_name=False,
     )
@@ -222,44 +244,21 @@ with gr.Blocks() as htr_tool_tab:
         api_name="run_htr_pipeline",
     )
-    def dummy_update_htr_tool_transcriber_model_dropdown(htr_tool_transcriber_model_dropdown):
-        return gr.update(value="Riksarkivet/satrn_htr")
-    htr_tool_transcriber_model_dropdown.change(
-        fn=dummy_update_htr_tool_transcriber_model_dropdown,
-        inputs=htr_tool_transcriber_model_dropdown,
-        outputs=htr_tool_transcriber_model_dropdown,
-        queue=False,
-        api_name=False,
-    )
-    def update_selected_tab_output_and_setting():
-        return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
-    def update_selected_tab_image_viewer():
-        return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
-    def update_selected_tab_model_compare():
-        return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
     tab_output_and_setting_selector.select(
         fn=update_selected_tab_output_and_setting,
         outputs=[output_and_setting_tab, image_viewer_tab, model_compare_selector],
-        queue=False,
         api_name=False,
     )
     tab_image_viewer_selector.select(
         fn=update_selected_tab_image_viewer,
         outputs=[output_and_setting_tab, image_viewer_tab, model_compare_selector],
-        queue=False,
         api_name=False,
     )
     tab_model_compare_selector.select(
         fn=update_selected_tab_model_compare,
         outputs=[output_and_setting_tab, image_viewer_tab, model_compare_selector],
-        queue=False,
         api_name=False,
     )
@@ -273,7 +272,6 @@ with gr.Blocks() as htr_tool_tab:
         fn=stop_function,
         inputs=None,
         outputs=None,
-        queue=False,
         api_name=False,
         # cancels=[htr_event_click_event],
     )
@@ -282,7 +280,6 @@ with gr.Blocks() as htr_tool_tab:
         fn=fast_track.visualize_image_viewer,
         inputs=fast_track_input_region_image,
         outputs=[fast_track_output_image, text_polygon_dict],
-        queue=False,
         api_name=False,
     )
@@ -290,7 +287,32 @@ with gr.Blocks() as htr_tool_tab:
         fast_track.get_text_from_coords,
         inputs=text_polygon_dict,
         outputs=selection_text_from_image_viewer,
-        queue=False,
         api_name=False,
     )

 from helper.examples.examples import DemoImages
 from helper.utils import TrafficDataHandler
+from src.htr_pipeline.gradio_backend import (
+    FastTrack,
+    SingletonModelLoader,
+    compare_diff_runs_highlight,
+    compute_cer_a_and_b_with_gt,
+    update_selected_tab_image_viewer,
+    update_selected_tab_model_compare,
+    update_selected_tab_output_and_setting,
+    upload_file,
+)
 model_loader = SingletonModelLoader()
 fast_track = FastTrack(model_loader)
                     )
                 with gr.Tab("Compare") as tab_model_compare_selector:
+                    with gr.Row():
+                        diff_runs_button = gr.Button("Compare runs", variant="primary", visible=True)
                         calc_cer_button_fast = gr.Button("Calculate CER", variant="primary", visible=True)
+                    with gr.Row():
+                        cer_output_fast = gr.Textbox(
+                            label="Character Error Rate:",
+                            info="The percentage of characters that have been transcribed incorrectly",
+                        )
         with gr.Column(scale=4):
             with gr.Box():
                                     with gr.Row():
                                         htr_tool_transcriber_model_dropdown = gr.Dropdown(
+                                            choices=[
+                                                "Riksarkivet/satrn_htr",
+                                                "microsoft/trocr-base-handwritten",
+                                                "pstroe/bullinger-general-model",
+                                            ],
                                             value="Riksarkivet/satrn_htr",
                                             label="Text recognition models",
                                             info="More models will be added",
                     )
                 with gr.Column(visible=False) as model_compare_selector:
                     with gr.Row():
+                        gr.Markdown("Compare different runs (Page XML output) with Ground Truth (GT)")
                     with gr.Row():
+                        with gr.Group():
+                            upload_button_run_a = gr.UploadButton("A", file_types=[".xml"], file_count="single")
+                            file_input_xml_run_a = gr.File(
+                                label=None,
+                                file_count="single",
+                                height=100,
+                                elem_id="download_file",
+                                interactive=False,
+                                visible=False,
+                            )
+                        with gr.Group():
+                            upload_button_run_b = gr.UploadButton("B", file_types=[".xml"], file_count="single")
+                            file_input_xml_run_b = gr.File(
+                                label=None,
+                                file_count="single",
+                                height=100,
+                                elem_id="download_file",
+                                interactive=False,
+                                visible=False,
+                            )
+                        with gr.Group():
+                            upload_button_run_gt = gr.UploadButton("GT", file_types=[".xml"], file_count="single")
+                            file_input_xml_run_gt = gr.File(
+                                label=None,
+                                file_count="single",
+                                height=100,
+                                elem_id="download_file",
+                                interactive=False,
+                                visible=False,
+                            )
+                    with gr.Tab("Comparing run A with B"):
+                        text_diff_runs = gr.HighlightedText(
+                            label="A with B",
                             combine_adjacent=True,
                             show_legend=True,
                             color_map={"+": "red", "-": "green"},
                         )
+                    with gr.Tab("Compare run A with Ground Truth"):
+                        text_diff_gt = gr.HighlightedText(
+                            label="A with GT",
                             combine_adjacent=True,
                             show_legend=True,
                             color_map={"+": "red", "-": "green"},
                         )
         xml_rendered_placeholder_for_api = gr.Textbox(placeholder="XML", visible=False)
     htr_event_click_event = htr_pipeline_button.click(
         fast_track.segment_to_xml,
+        inputs=[fast_track_input_region_image, radio_file_input, htr_tool_transcriber_model_dropdown],
         outputs=[fast_file_downlod, fast_file_downlod],
         api_name=False,
     )
         api_name="run_htr_pipeline",
     )
     tab_output_and_setting_selector.select(
         fn=update_selected_tab_output_and_setting,
         outputs=[output_and_setting_tab, image_viewer_tab, model_compare_selector],
         api_name=False,
     )
     tab_image_viewer_selector.select(
         fn=update_selected_tab_image_viewer,
         outputs=[output_and_setting_tab, image_viewer_tab, model_compare_selector],
         api_name=False,
     )
     tab_model_compare_selector.select(
         fn=update_selected_tab_model_compare,
         outputs=[output_and_setting_tab, image_viewer_tab, model_compare_selector],
         api_name=False,
     )
         fn=stop_function,
         inputs=None,
         outputs=None,
         api_name=False,
         # cancels=[htr_event_click_event],
     )
         fn=fast_track.visualize_image_viewer,
         inputs=fast_track_input_region_image,
         outputs=[fast_track_output_image, text_polygon_dict],
         api_name=False,
     )
         fast_track.get_text_from_coords,
         inputs=text_polygon_dict,
         outputs=selection_text_from_image_viewer,
+        api_name=False,
+    )
+    upload_button_run_a.upload(
+        upload_file, inputs=upload_button_run_a, outputs=[file_input_xml_run_a, file_input_xml_run_a], api_name=False
+    )
+    upload_button_run_b.upload(
+        upload_file, inputs=upload_button_run_b, outputs=[file_input_xml_run_b, file_input_xml_run_b], api_name=False
+    )
+    upload_button_run_gt.upload(
+        upload_file, inputs=upload_button_run_gt, outputs=[file_input_xml_run_gt, file_input_xml_run_gt], api_name=False
+    )
+    diff_runs_button.click(
+        fn=compare_diff_runs_highlight,
+        inputs=[file_input_xml_run_a, file_input_xml_run_b, file_input_xml_run_gt],
+        outputs=[text_diff_runs, text_diff_gt],
+        api_name=False,
+    )
+    calc_cer_button_fast.click(
+        fn=compute_cer_a_and_b_with_gt,
+        inputs=[file_input_xml_run_a, file_input_xml_run_b, file_input_xml_run_gt],
+        outputs=cer_output_fast,
         api_name=False,
     )

tabs/overview_tab.py CHANGED Viewed

@@ -4,7 +4,7 @@ from helper.text.text_overview import TextOverview
 with gr.Blocks() as overview:
     with gr.Tabs():
-        with gr.Tab("HTRFLOW"):
             with gr.Row():
                 with gr.Column():
                     gr.Markdown(TextOverview.htrflow_col1)
@@ -56,6 +56,8 @@ with gr.Blocks() as overview:
             with gr.Row():
                 with gr.Column():
                     gr.Markdown(TextOverview.changelog)
                 with gr.Column():
                     gr.Markdown(TextOverview.roadmap)

 with gr.Blocks() as overview:
     with gr.Tabs():
+        with gr.Tab("About"):
             with gr.Row():
                 with gr.Column():
                     gr.Markdown(TextOverview.htrflow_col1)
             with gr.Row():
                 with gr.Column():
                     gr.Markdown(TextOverview.changelog)
+                    with gr.Accordion("Previous changes", open=False):
+                        gr.Markdown(TextOverview.old_changelog)
                 with gr.Column():
                     gr.Markdown(TextOverview.roadmap)

tabs/stepwise_htr_tool.py CHANGED Viewed

@@ -287,24 +287,39 @@ with gr.Blocks() as stepwise_htr_tool_tab:
         else:
             return "Ground truth not provided"
-    calc_cer_button.click(compute_cer, inputs=[dataframe_text_index, gt_text_index], outputs=cer_output)
-    calc_cer_button.click(diff_texts, inputs=[dataframe_text_index, gt_text_index], outputs=[diff_token_output])
     region_segment_button.click(
         custom_track.region_segment,
         inputs=[input_region_image, reg_pred_score_threshold_slider, reg_containments_threshold_slider],
         outputs=[output_region_image, regions_cropped_gallery, image_placeholder_lines, control_line_segment],
     )
     regions_cropped_gallery.select(
-        custom_track.get_select_index_image, regions_cropped_gallery, input_region_from_gallery
     )
     transcribed_text_df_finish.select(
         fn=custom_track.get_select_index_df,
         inputs=[transcribed_text_df_finish, mapping_dict],
         outputs=[gallery_inputs_lines_to_transcribe, dataframe_text_index],
     )
     line_segment_button.click(
@@ -322,9 +337,14 @@ with gr.Blocks() as stepwise_htr_tool_tab:
             image_placeholder_htr,
             control_htr,
         ],
     )
-    copy_textarea.click(fn=None, _js="""document.querySelector("#textarea_stepwise_3 > label > button").click()""")
     transcribe_button.click(
         custom_track.transcribe_text,
@@ -337,6 +357,7 @@ with gr.Blocks() as stepwise_htr_tool_tab:
             control_results_transcribe,
             image_placeholder_explore_results,
         ],
     )
     clear_button.click(
@@ -377,6 +398,7 @@ with gr.Blocks() as stepwise_htr_tool_tab:
             image_placeholder_explore_results,
             image_placeholder_lines,
         ],
     )
     SECRET_KEY = os.environ.get("AM_I_IN_A_DOCKER_CONTAINER", False)

         else:
             return "Ground truth not provided"
+    calc_cer_button.click(
+        compute_cer,
+        inputs=[dataframe_text_index, gt_text_index],
+        outputs=cer_output,
+        api_name=False,
+    )
+    calc_cer_button.click(
+        diff_texts,
+        inputs=[dataframe_text_index, gt_text_index],
+        outputs=[diff_token_output],
+        api_name=False,
+    )
     region_segment_button.click(
         custom_track.region_segment,
         inputs=[input_region_image, reg_pred_score_threshold_slider, reg_containments_threshold_slider],
         outputs=[output_region_image, regions_cropped_gallery, image_placeholder_lines, control_line_segment],
+        api_name=False,
     )
     regions_cropped_gallery.select(
+        custom_track.get_select_index_image,
+        regions_cropped_gallery,
+        input_region_from_gallery,
+        api_name=False,
     )
     transcribed_text_df_finish.select(
         fn=custom_track.get_select_index_df,
         inputs=[transcribed_text_df_finish, mapping_dict],
         outputs=[gallery_inputs_lines_to_transcribe, dataframe_text_index],
+        api_name=False,
     )
     line_segment_button.click(
             image_placeholder_htr,
             control_htr,
         ],
+        api_name=False,
     )
+    copy_textarea.click(
+        fn=None,
+        _js="""document.querySelector("#textarea_stepwise_3 > label > button").click()""",
+        api_name=False,
+    )
     transcribe_button.click(
         custom_track.transcribe_text,
             control_results_transcribe,
             image_placeholder_explore_results,
         ],
+        api_name=False,
     )
     clear_button.click(
             image_placeholder_explore_results,
             image_placeholder_lines,
         ],
+        api_name=False,
     )
     SECRET_KEY = os.environ.get("AM_I_IN_A_DOCKER_CONTAINER", False)