Spaces:

JohnTan38
/

sparrow-ui

No application file

App Files Files Community

JohnTan38 commited on Dec 30, 2023

Commit

e8a20da

•

1 Parent(s): 984fd33

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

toolbar/config.py +5 -0
toolbar/main.py +187 -0
toolbar/toolbar/__init__.py +5 -0
toolbar/toolbar/index.html +150 -0
toolbar/toolbar_main/__init__.py +5 -0
toolbar/toolbar_main/index.html +149 -0
toolbar/tools/agstyler.py +77 -0
toolbar/tools/data_review.py +29 -0
toolbar/tools/st_functions.py +72 -0
toolbar/tools/utilities.py +9 -0
toolbar/views/about.py +33 -0
toolbar/views/dashboard.py +378 -0
toolbar/views/data_annotation.py +692 -0
toolbar/views/data_inference.py +219 -0
toolbar/views/data_review.py +165 -0
toolbar/views/model_training.py +9 -0
toolbar/views/model_tuning.py +9 -0
toolbar/views/setup.py +233 -0

toolbar/config.py ADDED Viewed

	@@ -0,0 +1,5 @@

+class Settings():
+    sparrow_key = ""
+settings = Settings()

toolbar/main.py ADDED Viewed

	@@ -0,0 +1,187 @@

+import streamlit as st
+from streamlit_option_menu import option_menu
+from tools.utilities import load_css
+import json
+from views.dashboard import Dashboard
+from views.data_annotation import DataAnnotation
+from views.model_training import ModelTraining
+from views.model_tuning import ModelTuning
+from views.data_inference import DataInference
+from views.setup import Setup
+from views.data_review import DataReview
+from views.about import About
+import streamlit_javascript as st_js
+st.set_page_config(
+    page_title="Sparrow",
+    page_icon="favicon.ico",
+    layout="wide"
+)
+load_css()
+class Model:
+    menuTitle = "Sparrow"
+    option1 = "Dashboard"
+    option2 = "Data Annotation"
+    option3 = "Model Training"
+    option4 = "Model Tuning"
+    option5 = "Inference"
+    option6 = "Data Review"
+    option7 = "Setup"
+    option8 = "About"
+    menuIcon = "menu-up"
+    icon1 = "speedometer"
+    icon2 = "activity"
+    icon3 = "motherboard"
+    icon4 = "graph-up-arrow"
+    icon5 = "journal-arrow-down"
+    icon6 = "droplet"
+    icon7 = "clipboard-data"
+    icon8 = "chat"
+def view(model):
+    with st.sidebar:
+        menuItem = option_menu(model.menuTitle,
+                               [model.option1, model.option2, model.option5, model.option6, model.option7, model.option8],
+                               icons=[model.icon1, model.icon2, model.icon5, model.icon6, model.icon7, model.icon8],
+                               menu_icon=model.menuIcon,
+                               default_index=0,
+                               styles={
+                                   "container": {"padding": "5!important", "background-color": "#fafafa"},
+                                   "icon": {"color": "black", "font-size": "25px"},
+                                   "nav-link": {"font-size": "16px", "text-align": "left", "margin": "0px",
+                                                "--hover-color": "#eee"},
+                                   "nav-link-selected": {"background-color": "#037ffc"},
+                               })
+    if menuItem == model.option1:
+        Dashboard().view(Dashboard.Model())
+        logout_widget()
+    if menuItem == model.option2:
+        if 'ui_width' not in st.session_state or 'device_type' not in st.session_state or 'device_width' not in st.session_state:
+            # Get UI width
+            ui_width = st_js.st_javascript("window.innerWidth", key="ui_width_comp")
+            device_width = st_js.st_javascript("window.screen.width", key="device_width_comp")
+            if ui_width > 0 and device_width > 0:
+                # Add 20% of current screen width to compensate for the sidebar
+                ui_width = round(ui_width + (20 * ui_width / 100))
+                if device_width > 768:
+                    device_type = 'desktop'
+                else:
+                    device_type = 'mobile'
+                st.session_state['ui_width'] = ui_width
+                st.session_state['device_type'] = device_type
+                st.session_state['device_width'] = device_width
+                st.experimental_rerun()
+        else:
+            DataAnnotation().view(DataAnnotation.Model(), st.session_state['ui_width'], st.session_state['device_type'],
+                                  st.session_state['device_width'])
+        logout_widget()
+    if menuItem == model.option3:
+        ModelTraining().view(ModelTraining.Model())
+        logout_widget()
+    if menuItem == model.option4:
+        ModelTuning().view(ModelTuning.Model())
+        logout_widget()
+    if menuItem == model.option5:
+        if 'ui_width' not in st.session_state or 'device_type' not in st.session_state or 'device_width' not in st.session_state:
+            # Get UI width
+            ui_width = st_js.st_javascript("window.innerWidth", key="ui_width_comp")
+            device_width = st_js.st_javascript("window.screen.width", key="device_width_comp")
+            if ui_width > 0 and device_width > 0:
+                # Add 20% of current screen width to compensate for the sidebar
+                ui_width = round(ui_width + (20 * ui_width / 100))
+                if device_width > 768:
+                    device_type = 'desktop'
+                else:
+                    device_type = 'mobile'
+                st.session_state['ui_width'] = ui_width
+                st.session_state['device_type'] = device_type
+                st.session_state['device_width'] = device_width
+                st.experimental_rerun()
+        else:
+            DataInference().view(DataInference.Model(), st.session_state['ui_width'], st.session_state['device_type'],
+                                 st.session_state['device_width'])
+        logout_widget()
+    if menuItem == model.option6:
+        if 'ui_width' not in st.session_state or 'device_type' not in st.session_state or 'device_width' not in st.session_state:
+            # Get UI width
+            ui_width = st_js.st_javascript("window.innerWidth", key="ui_width_comp")
+            device_width = st_js.st_javascript("window.screen.width", key="device_width_comp")
+            if ui_width > 0 and device_width > 0:
+                # Add 20% of current screen width to compensate for the sidebar
+                ui_width = round(ui_width + (20 * ui_width / 100))
+                if device_width > 768:
+                    device_type = 'desktop'
+                else:
+                    device_type = 'mobile'
+                st.session_state['ui_width'] = ui_width
+                st.session_state['device_type'] = device_type
+                st.session_state['device_width'] = device_width
+                st.experimental_rerun()
+        else:
+            DataReview().view(DataReview.Model(), st.session_state['ui_width'], st.session_state['device_type'],
+                              st.session_state['device_width'])
+        logout_widget()
+    if menuItem == model.option7:
+        Setup().view(Setup.Model())
+        logout_widget()
+    if menuItem == model.option8:
+        About().view(About.Model())
+        logout_widget()
+def logout_widget():
+    with st.sidebar:
+        st.markdown("---")
+        # st.write("User:", "John Doe")
+        st.write("Version:", "2.0.0")
+        # st.button("Logout")
+        # st.markdown("---")
+        if 'visitors' not in st.session_state:
+            with open("docs/visitors.json", "r") as f:
+                visitors_json = json.load(f)
+                visitors = visitors_json["meta"]["visitors"]
+            visitors += 1
+            visitors_json["meta"]["visitors"] = visitors
+            with open("docs/visitors.json", "w") as f:
+                json.dump(visitors_json, f)
+            st.session_state['visitors'] = visitors
+        else:
+            visitors = st.session_state['visitors']
+        st.write("Counter:", visitors)
+view(Model())

toolbar/toolbar/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import streamlit.components.v1 as components
+component_toolbar_buttons = components.declare_component(
+    name='component_toolbar_buttons',
+    path='./toolbar'
+)

toolbar/toolbar/index.html ADDED Viewed

	@@ -0,0 +1,150 @@

+<html>
+<head>
+   <link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/1.6.3/flowbite.min.css" rel="stylesheet" />
+</head>
+<!--
+----------------------------------------------------
+Your custom static HTML goes in the body:
+-->
+<body>
+  <div class="inline-flex rounded-md shadow-sm" role="group">
+    <button id="create" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border border-gray-200 rounded-l-lg hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
+      <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
+        <path fill-rule="evenodd" d="M12 3.75a.75.75 0 01.75.75v6.75h6.75a.75.75 0 010 1.5h-6.75v6.75a.75.75 0 01-1.5 0v-6.75H4.5a.75.75 0 010-1.5h6.75V4.5a.75.75 0 01.75-.75z" clip-rule="evenodd"></path>
+      </svg>
+      Create
+    </button>
+    <button id="delete" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border-t border-b border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
+      <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
+        <path d="M6.75 9.25a.75.75 0 000 1.5h6.5a.75.75 0 000-1.5h-6.5z"></path>
+      </svg>
+      Delete
+    </button>
+    <button id="save" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border border-gray-200 rounded-r-md hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
+      <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
+        <path fill-rule="evenodd" d="M16.704 4.153a.75.75 0 01.143 1.052l-8 10.5a.75.75 0 01-1.127.075l-4.5-4.5a.75.75 0 011.06-1.06l3.894 3.893 7.48-9.817a.75.75 0 011.05-.143z" clip-rule="evenodd"></path>
+      </svg>
+      Save
+    </button>
+  </div>
+</body>
+<script type="text/javascript">
+  // ----------------------------------------------------
+  // Use these functions as is to perform required Streamlit
+  // component lifecycle actions:
+  //
+  // 1. Signal Streamlit client that component is ready
+  // 2. Signal Streamlit client to set visible height of the component
+  //    (this is optional, in case Streamlit doesn't correctly auto-set it)
+  // 3. Pass values from component to Streamlit client
+  //
+  // Helper function to send type and data messages to Streamlit client
+  const SET_COMPONENT_VALUE = "streamlit:setComponentValue"
+  const RENDER = "streamlit:render"
+  const COMPONENT_READY = "streamlit:componentReady"
+  const SET_FRAME_HEIGHT = "streamlit:setFrameHeight"
+  function _sendMessage(type, data) {
+    // copy data into object
+    var outboundData = Object.assign({
+      isStreamlitMessage: true,
+      type: type,
+    }, data)
+    if (type == SET_COMPONENT_VALUE) {
+      console.log("_sendMessage data: " + JSON.stringify(data))
+      console.log("_sendMessage outboundData: " + JSON.stringify(outboundData))
+    }
+    window.parent.postMessage(outboundData, "*")
+  }
+  function initialize(pipeline) {
+    // Hook Streamlit's message events into a simple dispatcher of pipeline handlers
+    window.addEventListener("message", (event) => {
+      if (event.data.type == RENDER) {
+        // The event.data.args dict holds any JSON-serializable value
+        // sent from the Streamlit client. It is already deserialized.
+        pipeline.forEach(handler => {
+          handler(event.data.args)
+        })
+      }
+    })
+    _sendMessage(COMPONENT_READY, { apiVersion: 1 });
+    // Component should be mounted by Streamlit in an iframe, so try to autoset the iframe height.
+    window.addEventListener("load", () => {
+      window.setTimeout(function () {
+        setFrameHeight(document.documentElement.clientHeight)
+      }, 0)
+    })
+    // Optionally, if auto-height computation fails, you can manually set it
+    // (uncomment below)
+    //setFrameHeight(200)
+  }
+  function setFrameHeight(height) {
+    _sendMessage(SET_FRAME_HEIGHT, { height: height })
+  }
+  // The `data` argument can be any JSON-serializable value.
+  function notifyHost(data) {
+    _sendMessage(SET_COMPONENT_VALUE, data)
+  }
+  // ----------------------------------------------------
+  // Your custom functionality for the component goes here:
+  function call(button) {
+    timestamp = Date.now()
+    action = {
+      "action": button.id,
+      "timestamp": timestamp
+    }
+    notifyHost({
+      value: action,
+      dataType: "json",
+    })
+  }
+  // ----------------------------------------------------
+  // Here you can customize a pipeline of handlers for
+  // inbound properties from the Streamlit client app
+  // Set initial value sent from Streamlit!
+  function initializeProps_Handler(props) {
+    for (let key of Object.keys(props.buttons)) {
+      btn = document.getElementById(key)
+      btn.disabled = props.buttons[key]
+    }
+    // btn.disabled = props.initial_state.delete_disabled
+  }
+  // Access values sent from Streamlit!
+  function dataUpdate_Handler(props) {
+    // let msgLabel = document.getElementById("message_label")
+    // msgLabel.innerText = `Update [${props.counter}] at ${props.datetime}`
+  }
+  // Simply log received data dictionary
+  function log_Handler(props) {
+    console.log("Received from Streamlit: " + JSON.stringify(props))
+  }
+  let pipeline = [initializeProps_Handler, dataUpdate_Handler, log_Handler]
+  // ----------------------------------------------------
+  // Finally, initialize component passing in pipeline
+  initialize(pipeline)
+</script>
+</html>

toolbar/toolbar_main/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+import streamlit.components.v1 as components
+component_toolbar_main = components.declare_component(
+    name='component_toolbar_main',
+    path='./toolbar_main'
+)

toolbar/toolbar_main/index.html ADDED Viewed

	@@ -0,0 +1,149 @@

+<html>
+<head>
+   <link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/1.6.3/flowbite.min.css" rel="stylesheet" />
+</head>
+<!--
+----------------------------------------------------
+Your custom static HTML goes in the body:
+-->
+<body>
+  <div class="inline-flex rounded-md shadow-sm" role="group">
+    <button id="up" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border border-gray-200 rounded-l-lg hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
+      <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
+        <path fill-rule="evenodd" d="M4.5 15.75l7.5-7.5 7.5 7.5" clip-rule="evenodd"></path>
+      </svg>
+      Up
+    </button>
+    <button id="down" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border-t border-b border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
+      <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
+        <path d="M19.5 8.25l-7.5 7.5-7.5-7.5"></path>
+      </svg>
+      Down
+    </button>
+    <button id="save" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border border-gray-200 rounded-r-md hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
+      <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
+        <path fill-rule="evenodd" d="M16.704 4.153a.75.75 0 01.143 1.052l-8 10.5a.75.75 0 01-1.127.075l-4.5-4.5a.75.75 0 011.06-1.06l3.894 3.893 7.48-9.817a.75.75 0 011.05-.143z" clip-rule="evenodd"></path>
+      </svg>
+      Save
+    </button>
+  </div>
+</body>
+<script type="text/javascript">
+  // ----------------------------------------------------
+  // Use these functions as is to perform required Streamlit
+  // component lifecycle actions:
+  //
+  // 1. Signal Streamlit client that component is ready
+  // 2. Signal Streamlit client to set visible height of the component
+  //    (this is optional, in case Streamlit doesn't correctly auto-set it)
+  // 3. Pass values from component to Streamlit client
+  //
+  // Helper function to send type and data messages to Streamlit client
+  const SET_COMPONENT_VALUE = "streamlit:setComponentValue"
+  const RENDER = "streamlit:render"
+  const COMPONENT_READY = "streamlit:componentReady"
+  const SET_FRAME_HEIGHT = "streamlit:setFrameHeight"
+  function _sendMessage(type, data) {
+    // copy data into object
+    var outboundData = Object.assign({
+      isStreamlitMessage: true,
+      type: type,
+    }, data)
+    if (type == SET_COMPONENT_VALUE) {
+      console.log("_sendMessage data: " + JSON.stringify(data))
+      console.log("_sendMessage outboundData: " + JSON.stringify(outboundData))
+    }
+    window.parent.postMessage(outboundData, "*")
+  }
+  function initialize(pipeline) {
+    // Hook Streamlit's message events into a simple dispatcher of pipeline handlers
+    window.addEventListener("message", (event) => {
+      if (event.data.type == RENDER) {
+        // The event.data.args dict holds any JSON-serializable value
+        // sent from the Streamlit client. It is already deserialized.
+        pipeline.forEach(handler => {
+          handler(event.data.args)
+        })
+      }
+    })
+    _sendMessage(COMPONENT_READY, { apiVersion: 1 });
+    // Component should be mounted by Streamlit in an iframe, so try to autoset the iframe height.
+    window.addEventListener("load", () => {
+      window.setTimeout(function () {
+        setFrameHeight(document.documentElement.clientHeight)
+      }, 0)
+    })
+    // Optionally, if auto-height computation fails, you can manually set it
+    // (uncomment below)
+    //setFrameHeight(200)
+  }
+  function setFrameHeight(height) {
+    _sendMessage(SET_FRAME_HEIGHT, { height: height })
+  }
+  // The `data` argument can be any JSON-serializable value.
+  function notifyHost(data) {
+    _sendMessage(SET_COMPONENT_VALUE, data)
+  }
+  // ----------------------------------------------------
+  // Your custom functionality for the component goes here:
+  function call(button) {
+    timestamp = Date.now()
+    action = {
+      "action": button.id,
+      "timestamp": timestamp
+    }
+    notifyHost({
+      value: action,
+      dataType: "json",
+    })
+  }
+  // ----------------------------------------------------
+  // Here you can customize a pipeline of handlers for
+  // inbound properties from the Streamlit client app
+  // Set initial value sent from Streamlit!
+  function initializeProps_Handler(props) {
+    for (let key of Object.keys(props.buttons)) {
+      btn = document.getElementById(key)
+      btn.disabled = props.buttons[key]['disabled']
+      btn.style.display = props.buttons[key]['rendered']
+    }
+  }
+  // Access values sent from Streamlit!
+  function dataUpdate_Handler(props) {
+    // let msgLabel = document.getElementById("message_label")
+    // msgLabel.innerText = `Update [${props.counter}] at ${props.datetime}`
+  }
+  // Simply log received data dictionary
+  function log_Handler(props) {
+    console.log("Received from Streamlit: " + JSON.stringify(props))
+  }
+  let pipeline = [initializeProps_Handler, dataUpdate_Handler, log_Handler]
+  // ----------------------------------------------------
+  // Finally, initialize component passing in pipeline
+  initialize(pipeline)
+</script>
+</html>

toolbar/tools/agstyler.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# adjusted from: https://github.com/nryabykh/streamlit-aggrid-hints
+from st_aggrid import AgGrid
+from st_aggrid.grid_options_builder import GridOptionsBuilder
+from st_aggrid.shared import GridUpdateMode, JsCode
+def get_numeric_style_with_precision(precision: int) -> dict:
+    return {"type": ["numericColumn", "customNumericFormat"], "precision": precision}
+PRECISION_ZERO = get_numeric_style_with_precision(0)
+PRECISION_ONE = get_numeric_style_with_precision(1)
+PRECISION_TWO = get_numeric_style_with_precision(2)
+PINLEFT = {"pinned": "left"}
+def draw_grid(
+        df,
+        formatter: dict = None,
+        selection="multiple",
+        use_checkbox=False,
+        fit_columns=False,
+        pagination_size=0,
+        theme="streamlit",
+        wrap_text: bool = False,
+        auto_height: bool = False,
+        grid_options: dict = None,
+        key=None,
+        css: dict = None
+):
+    gb = GridOptionsBuilder()
+    gb.configure_default_column(
+        filterable=True,
+        groupable=False,
+        editable=False,
+        wrapText=wrap_text,
+        autoHeight=auto_height
+    )
+    if grid_options is not None:
+        gb.configure_grid_options(**grid_options)
+    for latin_name, (cyr_name, style_dict) in formatter.items():
+        gb.configure_column(latin_name, header_name=cyr_name, **style_dict)
+    gb.configure_selection(selection_mode=selection, use_checkbox=use_checkbox)
+    if pagination_size > 0:
+        gb.configure_pagination(enabled=True, paginationAutoPageSize=False, paginationPageSize=pagination_size)
+    return AgGrid(
+        df,
+        gridOptions=gb.build(),
+        update_mode=GridUpdateMode.SELECTION_CHANGED | GridUpdateMode.VALUE_CHANGED,
+        allow_unsafe_jscode=True,
+        fit_columns_on_grid_load=fit_columns,
+        theme=theme,
+        key=key,
+        custom_css=css,
+        enable_enterprise_modules=False
+    )
+def highlight(color, condition):
+    code = f"""
+        function(params) {{
+            color = "{color}";
+            if ({condition}) {{
+                return {{
+                    'backgroundColor': color
+                }}
+            }}
+        }};
+    """
+    return JsCode(code)

toolbar/tools/data_review.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+from natsort import natsorted
+import json
+def annotation_review():
+    # get list of files in json directory
+    processed_file_names = get_processed_file_names('../docs/json/')
+    for file_name in processed_file_names:
+        # open json file
+        with open('../docs/json/' + file_name + '.json') as json_file:
+            json_file_data = json.load(json_file)
+            version = json_file_data['meta']['version']
+            if version == "v0.1":
+                print(file_name + " is v0.1")
+def get_processed_file_names(dir_name):
+    # get ordered list of files without file extension, excluding hidden files, with JSON extension only
+    file_names = [os.path.splitext(f)[0] for f in os.listdir(dir_name) if
+                    os.path.isfile(os.path.join(dir_name, f)) and not f.startswith('.') and f.endswith('.json')]
+    file_names = natsorted(file_names)
+    return file_names
+def main():
+    annotation_review()
+if __name__ == '__main__':
+    main()

toolbar/tools/st_functions.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import streamlit as st
+def st_button(icon, url, label, iconsize):
+    if icon == 'youtube':
+        button_code = f'''
+        <p>
+            <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
+                <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-youtube" viewBox="0 0 16 16">
+                    <path d="M8.051 1.999h.089c.822.003 4.987.033 6.11.335a2.01 2.01 0 0 1 1.415 1.42c.101.38.172.883.22 1.402l.01.104.022.26.008.104c.065.914.073 1.77.074 1.957v.075c-.001.194-.01 1.108-.082 2.06l-.008.105-.009.104c-.05.572-.124 1.14-.235 1.558a2.007 2.007 0 0 1-1.415 1.42c-1.16.312-5.569.334-6.18.335h-.142c-.309 0-1.587-.006-2.927-.052l-.17-.006-.087-.004-.171-.007-.171-.007c-1.11-.049-2.167-.128-2.654-.26a2.007 2.007 0 0 1-1.415-1.419c-.111-.417-.185-.986-.235-1.558L.09 9.82l-.008-.104A31.4 31.4 0 0 1 0 7.68v-.123c.002-.215.01-.958.064-1.778l.007-.103.003-.052.008-.104.022-.26.01-.104c.048-.519.119-1.023.22-1.402a2.007 2.007 0 0 1 1.415-1.42c.487-.13 1.544-.21 2.654-.26l.17-.007.172-.006.086-.003.171-.007A99.788 99.788 0 0 1 7.858 2h.193zM6.4 5.209v4.818l4.157-2.408L6.4 5.209z"/>
+                </svg>
+                {label}
+            </a>
+        </p>'''
+    elif icon == 'twitter':
+        button_code = f'''
+        <p>
+        <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
+            <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-twitter" viewBox="0 0 16 16">
+                <path d="M5.026 15c6.038 0 9.341-5.003 9.341-9.334 0-.14 0-.282-.006-.422A6.685 6.685 0 0 0 16 3.542a6.658 6.658 0 0 1-1.889.518 3.301 3.301 0 0 0 1.447-1.817 6.533 6.533 0 0 1-2.087.793A3.286 3.286 0 0 0 7.875 6.03a9.325 9.325 0 0 1-6.767-3.429 3.289 3.289 0 0 0 1.018 4.382A3.323 3.323 0 0 1 .64 6.575v.045a3.288 3.288 0 0 0 2.632 3.218 3.203 3.203 0 0 1-.865.115 3.23 3.23 0 0 1-.614-.057 3.283 3.283 0 0 0 3.067 2.277A6.588 6.588 0 0 1 .78 13.58a6.32 6.32 0 0 1-.78-.045A9.344 9.344 0 0 0 5.026 15z"/>
+            </svg>
+            {label}
+        </a>
+        </p>'''
+    elif icon == 'linkedin':
+        button_code = f'''
+        <p>
+            <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
+                <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-linkedin" viewBox="0 0 16 16">
+                    <path d="M0 1.146C0 .513.526 0 1.175 0h13.65C15.474 0 16 .513 16 1.146v13.708c0 .633-.526 1.146-1.175 1.146H1.175C.526 16 0 15.487 0 14.854V1.146zm4.943 12.248V6.169H2.542v7.225h2.401zm-1.2-8.212c.837 0 1.358-.554 1.358-1.248-.015-.709-.52-1.248-1.342-1.248-.822 0-1.359.54-1.359 1.248 0 .694.521 1.248 1.327 1.248h.016zm4.908 8.212V9.359c0-.216.016-.432.08-.586.173-.431.568-.878 1.232-.878.869 0 1.216.662 1.216 1.634v3.865h2.401V9.25c0-2.22-1.184-3.252-2.764-3.252-1.274 0-1.845.7-2.165 1.193v.025h-.016a5.54 5.54 0 0 1 .016-.025V6.169h-2.4c.03.678 0 7.225 0 7.225h2.4z"/>
+                </svg>
+                {label}
+            </a>
+        </p>'''
+    elif icon == 'medium':
+        button_code = f'''
+        <p>
+            <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
+                <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-medium" viewBox="0 0 16 16">
+                    <path d="M9.025 8c0 2.485-2.02 4.5-4.513 4.5A4.506 4.506 0 0 1 0 8c0-2.486 2.02-4.5 4.512-4.5A4.506 4.506 0 0 1 9.025 8zm4.95 0c0 2.34-1.01 4.236-2.256 4.236-1.246 0-2.256-1.897-2.256-4.236 0-2.34 1.01-4.236 2.256-4.236 1.246 0 2.256 1.897 2.256 4.236zM16 8c0 2.096-.355 3.795-.794 3.795-.438 0-.793-1.7-.793-3.795 0-2.096.355-3.795.794-3.795.438 0 .793 1.699.793 3.795z"/>
+                </svg>
+                {label}
+            </a>
+        </p>'''
+    elif icon == 'newsletter':
+        button_code = f'''
+        <p>
+            <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
+                <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-envelope" viewBox="0 0 16 16">
+                    <path d="M0 4a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2V4Zm2-1a1 1 0 0 0-1 1v.217l7 4.2 7-4.2V4a1 1 0 0 0-1-1H2Zm13 2.383-4.708 2.825L15 11.105V5.383Zm-.034 6.876-5.64-3.471L8 9.583l-1.326-.795-5.64 3.47A1 1 0 0 0 2 13h12a1 1 0 0 0 .966-.741ZM1 11.105l4.708-2.897L1 5.383v5.722Z"/>
+                </svg>
+                {label}
+            </a>
+        </p>'''
+    elif icon == 'github':
+        button_code = f'''
+        <p>
+            <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
+                <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">
+                    <path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
+                </svg>
+                {label}
+            </a>
+        </p>'''
+    elif icon == '':
+        button_code = f'''
+        <p>
+            <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
+                {label}
+            </a>
+        </p>'''
+    return st.markdown(button_code, unsafe_allow_html=True)

toolbar/tools/utilities.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import streamlit as st
+def load_css():
+    with open("tools/style.css") as f:
+        st.markdown('<style>{}</style>'.format(f.read()), unsafe_allow_html=True)
+    st.markdown(
+        '<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">',
+        unsafe_allow_html=True)

toolbar/views/about.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import streamlit as st
+from PIL import Image
+from tools.st_functions import st_button
+class About:
+    class Model:
+        pageTitle = "About"
+    def view(self, model):
+        # st.title(model.pageTitle)
+        st.write(
+            "[![Star](https://img.shields.io/github/stars/katanaml/sparrow.svg?logo=github&style=social)](https://github.com/katanaml/sparrow)")
+        col1, col2, col3 = st.columns(3)
+        col2.image(Image.open('assets/ab.png'))
+        st.markdown("<h1 style='text-align: center; color: black; font-weight: bold;'>Andrej Baranovskij, Founder Katana ML</h1>",
+                    unsafe_allow_html=True)
+        st.info(
+            'Sparrow is a tool for data extraction from PDFs, images, and other documents. It is a part of Katana ML, '
+            'a platform for data science and machine learning.')
+        icon_size = 20
+        st_button('youtube', 'https://www.youtube.com/@AndrejBaranovskij', 'Andrej Baranovskij YouTube channel', icon_size)
+        st_button('github', 'https://github.com/katanaml/sparrow', 'Sparrow GitHub', icon_size)
+        st_button('twitter', 'https://twitter.com/andrejusb', 'Follow me on Twitter', icon_size)
+        st_button('medium', 'https://andrejusb.medium.com', 'Read my Blogs on Medium', icon_size)
+        st_button('linkedin', 'https://www.linkedin.com/in/andrej-baranovskij/', 'Follow me on LinkedIn', icon_size)
+        st_button('', 'https://katanaml.io', 'Katana ML', icon_size)

toolbar/views/dashboard.py ADDED Viewed

	@@ -0,0 +1,378 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+import json
+import altair as alt
+from pathlib import Path
+import requests
+class Dashboard:
+    class Model:
+        pageTitle = "Dashboard"
+        wordsTitle = "Words"
+        inferenceTimeTitle = "Inference Time"
+        documentsTitle = "Documents"
+        dailyInferenceTitle = "Top Daily Inference"
+        accuracyTitle = "Mean Accuracy"
+        titleModelEval = "## Evaluation Accuracy"
+        titleInferencePerformance = "## Inference Performance"
+        titleDatasetInfo = "## Dataset Info"
+        titleDataAnnotation = "## Data Annotation"
+        titleTrainingPerformance = "## Training Performance"
+        titleEvaluationPerformance = "## Evaluation Performance"
+        status_file = "docs/status.json"
+        annotation_files_dir = "docs/json"
+    def view(self, model):
+        # st.title(model.pageTitle)
+        api_url = "https://katanaml-org-sparrow-ml.hf.space/api-inference/v1/sparrow-ml/statistics"
+        json_data_inference = []
+        response = requests.get(api_url)
+        if response.status_code == 200:
+            json_data_inference = response.json()
+        else:
+            print(f"Error: Unable to fetch data from the API (status code {response.status_code})")
+        api_url_t = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/training"
+        json_data_training = []
+        response_t = requests.get(api_url_t)
+        if response_t.status_code == 200:
+            json_data_training = response_t.json()
+        else:
+            print(f"Error: Unable to fetch data from the API (status code {response_t.status_code})")
+        api_url_e = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/evaluate"
+        json_data_evaluate = []
+        response_e = requests.get(api_url_e)
+        if response_e.status_code == 200:
+            json_data_evaluate = response_e.json()
+        else:
+            print(f"Error: Unable to fetch data from the API (status code {response_e.status_code})")
+        with st.container():
+            col1, col2, col3, col4, col5 = st.columns(5)
+            with col1:
+                words_count = 0
+                delta_words = 0
+                if len(json_data_inference) > 3:
+                    for i in range(0, len(json_data_inference)):
+                        words_count = words_count + json_data_inference[i][1]
+                    avg_word_count = words_count / len(json_data_inference)
+                    avg_word_last = (json_data_inference[len(json_data_inference) - 1][1]
+                                     + json_data_inference[len(json_data_inference) - 2][1] +
+                                     json_data_inference[len(json_data_inference) - 3][1]) / 3
+                    if avg_word_last >= avg_word_count:
+                        delta_words = round(100 - ((avg_word_count * 100) / avg_word_last), 2)
+                    else:
+                        delta_words = round(100 - ((avg_word_last * 100) / avg_word_count), 2) * -1
+                    words_count = words_count / 1000
+                st.metric(label=model.wordsTitle, value=str(words_count) + 'K', delta=str(delta_words) + "%")
+            with col2:
+                docs_count = len(json_data_inference)
+                delta_docs = 0
+                if docs_count > 3:
+                    inference_dates = []
+                    for i in range(0, len(json_data_inference)):
+                        inference_dates.append(json_data_inference[i][4].split(" ")[0])
+                    inference_dates_unique = []
+                    for item in inference_dates:
+                        if item not in inference_dates_unique:
+                            inference_dates_unique.append(item)
+                    if len(inference_dates_unique) > 3:
+                        inference_dates_dict = {}
+                        for i, key in enumerate(inference_dates_unique):
+                            inference_dates_dict[key] = [0]
+                        for i in range(0, len(json_data_inference)):
+                            inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \
+                                inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1
+                        # calculate average for values from inference_dates_dict
+                        avg_value = 0
+                        for key, value in inference_dates_dict.items():
+                            avg_value = avg_value + value[0]
+                        avg_value = round(avg_value / len(inference_dates_dict), 2)
+                        # calculate average for last 3 values from inference_dates_dict
+                        avg_value_last = 0
+                        for i in range(1, 4):
+                            avg_value_last = avg_value_last + inference_dates_dict[inference_dates_unique[len(inference_dates_unique) - i]][0]
+                        avg_value_last = round(avg_value_last / 3, 2)
+                        if avg_value_last > avg_value:
+                            delta_docs = round(100 - ((avg_value * 100) / avg_value_last), 2)
+                        else:
+                            delta_docs = round(100 - ((avg_value_last * 100) / avg_value), 2) * -1
+                st.metric(label=model.documentsTitle, value=docs_count, delta=str(delta_docs) + "%")
+            with col3:
+                inference_dates = []
+                for i in range(0, len(json_data_inference)):
+                    inference_dates.append(json_data_inference[i][4].split(" ")[0])
+                inference_dates_unique = []
+                for item in inference_dates:
+                    if item not in inference_dates_unique:
+                        inference_dates_unique.append(item)
+                inference_dates_dict = {}
+                for i, key in enumerate(inference_dates_unique):
+                    inference_dates_dict[key] = [0]
+                for i in range(0, len(json_data_inference)):
+                    inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \
+                        inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1
+                # loop through the dictionary and find the max value
+                max_value = 0
+                for key, value in inference_dates_dict.items():
+                    if value[0] > max_value:
+                        max_value = value[0]
+                # calculate average for values from inference_dates_dict
+                avg_value = 0
+                for key, value in inference_dates_dict.items():
+                    avg_value = avg_value + value[0]
+                avg_value = round(avg_value / len(inference_dates_dict), 2)
+                avg_delta = round(100 - ((avg_value * 100) / max_value), 2)
+                st.metric(label=model.dailyInferenceTitle, value=max_value, delta=str(avg_delta) + "%")
+            with col4:
+                inference_time_avg = 0
+                # calculate inference time average
+                for i in range(0, len(json_data_inference)):
+                    inference_time_avg = inference_time_avg + json_data_inference[i][0]
+                inference_time_avg = round(inference_time_avg / len(json_data_inference), 2)
+                delta_time = 0
+                if len(json_data_inference) > 3:
+                    avg_time_last = (json_data_inference[len(json_data_inference) - 1][0] +
+                                     json_data_inference[len(json_data_inference) - 2][0] +
+                                     json_data_inference[len(json_data_inference) - 3][0]) / 3
+                    if avg_time_last > inference_time_avg:
+                        delta_time = round(100 - ((inference_time_avg * 100) / avg_time_last), 2)
+                    else:
+                        delta_time = round(100 - ((avg_time_last * 100) / inference_time_avg), 2) * -1
+                st.metric(label=model.inferenceTimeTitle, value=str(inference_time_avg) + " s", delta=str(delta_time) + "%",
+                          delta_color="inverse")
+            with col5:
+                models_unique = []
+                models_dict = {}
+                for i in range(0, len(json_data_evaluate)):
+                    if json_data_evaluate[i][3] not in models_unique:
+                        models_unique.append(json_data_evaluate[i][3])
+                        models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['mean_accuracy']
+                avg_accuracy = 0
+                for key, value in models_dict.items():
+                    avg_accuracy = avg_accuracy + value
+                avg_accuracy = round(avg_accuracy / len(models_dict), 2)
+                if len(models_unique) > 3:
+                    # calculate average accuracy for last 3 values
+                    avg_accuracy_last = 0
+                    for i in range(1, 4):
+                        avg_accuracy_last = avg_accuracy_last + models_dict[models_unique[len(models_unique) - i]]
+                    avg_accuracy_last = round(avg_accuracy_last / 3, 2)
+                else:
+                    avg_accuracy_last = avg_accuracy
+                if avg_accuracy_last > avg_accuracy:
+                    delta_accuracy = round(100 - ((avg_accuracy * 100) / avg_accuracy_last), 2)
+                else:
+                    delta_accuracy = round(100 - ((avg_accuracy_last * 100) / avg_accuracy), 2) * -1
+                st.metric(label=model.accuracyTitle, value=avg_accuracy, delta=str(delta_accuracy) + "%",
+                          delta_color="inverse")
+            st.markdown("---")
+        with st.container():
+            col1, col2 = st.columns(2)
+            with col1:
+                st.write(model.titleInferencePerformance)
+                models_dict = {}
+                models = []
+                for i in range(0, len(json_data_inference)):
+                    models.append(json_data_inference[i][3])
+                models_unique = []
+                for item in models:
+                    if item not in models_unique:
+                        models_unique.append(item)
+                for i, key in enumerate(models_unique):
+                    models_dict[key] = []
+                for i in range(0, len(json_data_inference)):
+                    models_dict[json_data_inference[i][3]].append(round(json_data_inference[i][0]))
+                data = pd.DataFrame(models_dict)
+                st.line_chart(data)
+            with col2:
+                st.write(model.titleModelEval)
+                models_unique = []
+                models_dict = {}
+                for i in range(0, len(json_data_evaluate)):
+                    if json_data_evaluate[i][3] not in models_unique:
+                        models_unique.append(json_data_evaluate[i][3])
+                        models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['accuracies']
+                data = pd.DataFrame(models_dict)
+                st.line_chart(data)
+        st.markdown("---")
+        with st.container():
+            col1, col2, col3 = st.columns(3)
+            with col1:
+                with st.container():
+                    st.write(model.titleDataAnnotation)
+                    total, completed, in_progress = self.calculate_annotation_stats(model)
+                    data = pd.DataFrame({"Status": ["Completed", "In Progress"], "Value": [completed, in_progress]})
+                    # Create a horizontal bar chart
+                    chart = alt.Chart(data).mark_bar().encode(
+                        x='Value:Q',
+                        y=alt.Y('Status:N', sort='-x'),
+                        color=alt.Color('Status:N', legend=None)
+                    )
+                    st.altair_chart(chart)
+            with col2:
+                with st.container():
+                    st.write(model.titleDatasetInfo)
+                    api_url = "https://katanaml-org-sparrow-data.hf.space/api-dataset/v1/sparrow-data/dataset_info"
+                    # Make the GET request
+                    response = requests.get(api_url)
+                    # Check if the request was successful (status code 200)
+                    names = []
+                    rows = []
+                    if response.status_code == 200:
+                        # Convert the response content to a JSON object
+                        json_data = response.json()
+                        for i in range(0, len(json_data['splits'])):
+                            names.append(json_data['splits'][i]['name'])
+                            rows.append(json_data['splits'][i]['number_of_rows'])
+                    else:
+                        print(f"Error: Unable to fetch data from the API (status code {response.status_code})")
+                    data = pd.DataFrame({"Dataset": names, "Value": rows})
+                    # Create a horizontal bar chart
+                    chart = alt.Chart(data).mark_bar().encode(
+                        x='Value:Q',
+                        y=alt.Y('Dataset:N', sort='-x'),
+                        color=alt.Color('Dataset:N', legend=None)
+                    )
+                    st.altair_chart(chart)
+            with col3:
+                with st.container():
+                    st.write(model.titleTrainingPerformance)
+                    models_dict = {}
+                    for i in range(0, len(json_data_training)):
+                        models_dict[i] = round(json_data_training[i][0])
+                    data = pd.DataFrame({"Runs": models_dict.keys(), "Value": list(models_dict.values())})
+                    # Create a horizontal bar chart
+                    chart = alt.Chart(data).mark_bar().encode(
+                        x='Value:Q',
+                        y=alt.Y('Runs:N', sort='-x'),
+                        color=alt.Color('Runs:N', legend=None)
+                    )
+                    st.altair_chart(chart)
+        st.markdown("---")
+        with st.container():
+            st.write(model.titleEvaluationPerformance)
+            runs_dict = {}
+            for i in range(0, len(json_data_evaluate)):
+                runs_dict[i] = round(json_data_evaluate[i][0])
+            data = pd.DataFrame({"Runs": runs_dict.keys(), "Value": list(runs_dict.values())})
+            # Create a horizontal bar chart
+            chart = alt.Chart(data).mark_bar().encode(
+                x='Value:Q',
+                y=alt.Y('Runs:N', sort='-x'),
+                color=alt.Color('Runs:N', legend=None)
+            )
+            st.altair_chart(chart)
+    def calculate_annotation_stats(self, model):
+        completed = 0
+        in_progress = 0
+        data_dir_path = Path(model.annotation_files_dir)
+        for file_name in data_dir_path.glob("*.json"):
+            with open(file_name, "r") as f:
+                data = json.load(f)
+                v = data['meta']['version']
+                if v == 'v0.1':
+                    in_progress += 1
+                else:
+                    completed += 1
+        total = completed + in_progress
+        status_json = {
+            "annotations": [
+                {
+                    "completed": completed,
+                    "in_progress": in_progress,
+                    "total": total
+                }
+            ]
+        }
+        with open(model.status_file, "w") as f:
+            json.dump(status_json, f, indent=2)
+        return total, completed, in_progress

toolbar/views/data_annotation.py ADDED Viewed

	@@ -0,0 +1,692 @@

+import streamlit as st
+from PIL import Image
+import streamlit_nested_layout
+from streamlit_sparrow_labeling import st_sparrow_labeling
+from streamlit_sparrow_labeling import DataProcessor
+import json
+import math
+import os
+from natsort import natsorted
+from tools import agstyler
+from tools.agstyler import PINLEFT
+import pandas as pd
+from toolbar_main import component_toolbar_main
+class DataAnnotation:
+    class Model:
+        pageTitle = "Data Annotation"
+        img_file = None
+        rects_file = None
+        labels_file = "docs/labels.json"
+        groups_file = "docs/groups.json"
+        assign_labels_text = "Assign Labels"
+        text_caption_1 = "Check 'Assign Labels' to enable editing of labels and values, move and resize the boxes to annotate the document."
+        text_caption_2 = "Add annotations by clicking and dragging on the document, when 'Assign Labels' is unchecked."
+        labels = ["", "invoice_no", "invoice_date", "seller", "client", "seller_tax_id", "client_tax_id", "iban", "item_desc",
+                  "item_qty", "item_net_price", "item_net_worth", "item_vat", "item_gross_worth", "total_net_worth", "total_vat",
+                  "total_gross_worth"]
+        groups = ["", "items_row1", "items_row2", "items_row3", "items_row4", "items_row5", "items_row6", "items_row7",
+                  "items_row8", "items_row9", "items_row10", "summary"]
+        selected_field = "Selected Field: "
+        save_text = "Save"
+        saved_text = "Saved!"
+        subheader_1 = "Select"
+        subheader_2 = "Upload"
+        annotation_text = "Annotation"
+        no_annotation_file = "No annotation file selected"
+        no_annotation_mapping = "Please annotate the document. Uncheck 'Assign Labels' and draw new annotations"
+        download_text = "Download"
+        download_hint = "Download the annotated structure in JSON format"
+        annotation_selection_help = "Select an annotation file to load"
+        upload_help = "Upload a file to annotate"
+        upload_button_text = "Upload"
+        upload_button_text_desc = "Choose a file"
+        assign_labels_text = "Assign Labels"
+        assign_labels_help = "Check to enable editing of labels and values"
+        export_labels_text = "Export Labels"
+        export_labels_help = "Create key-value pairs for the labels in JSON format"
+        done_text = "Done"
+        grouping_id = "ID"
+        grouping_value = "Value"
+        completed_text = "Completed"
+        completed_help = "Check to mark the annotation as completed"
+        error_text = "Value is too long. Please shorten it."
+        selection_must_be_continuous = "Please select continuous rows"
+    def view(self, model, ui_width, device_type, device_width):
+        with open(model.labels_file, "r") as f:
+            labels_json = json.load(f)
+        labels_list = labels_json["labels"]
+        labels = ['']
+        for label in labels_list:
+            labels.append(label['name'])
+        model.labels = labels
+        with open(model.groups_file, "r") as f:
+            groups_json = json.load(f)
+        groups_list = groups_json["groups"]
+        groups = ['']
+        for group in groups_list:
+            groups.append(group['name'])
+        model.groups = groups
+        with st.sidebar:
+            st.markdown("---")
+            st.subheader(model.subheader_1)
+            placeholder_upload = st.empty()
+            file_names = self.get_existing_file_names('docs/images/')
+            if 'annotation_index' not in st.session_state:
+                st.session_state['annotation_index'] = 0
+                annotation_index = 0
+            else:
+                annotation_index = st.session_state['annotation_index']
+            annotation_selection = placeholder_upload.selectbox(model.annotation_text, file_names,
+                                                                index=annotation_index,
+                                                                help=model.annotation_selection_help)
+            annotation_index = self.get_annotation_index(annotation_selection, file_names)
+            file_extension = self.get_file_extension(annotation_selection, 'docs/images/')
+            model.img_file = f"docs/images/{annotation_selection}" + file_extension
+            model.rects_file = f"docs/json/{annotation_selection}.json"
+            completed_check = st.empty()
+            btn = st.button(model.export_labels_text)
+            if btn:
+                self.export_labels(model)
+                st.write(model.done_text)
+            st.subheader(model.subheader_2)
+            with st.form("upload-form", clear_on_submit=True):
+                uploaded_file = st.file_uploader(model.upload_button_text_desc, accept_multiple_files=False,
+                                                 type=['png', 'jpg', 'jpeg'],
+                                                 help=model.upload_help)
+                submitted = st.form_submit_button(model.upload_button_text)
+                if submitted and uploaded_file is not None:
+                    ret = self.upload_file(uploaded_file)
+                    if ret is not False:
+                        file_names = self.get_existing_file_names('docs/images/')
+                        annotation_index = self.get_annotation_index(annotation_selection, file_names)
+                        annotation_selection = placeholder_upload.selectbox(model.annotation_text, file_names,
+                                                                            index=annotation_index,
+                                                                            help=model.annotation_selection_help)
+                        st.session_state['annotation_index'] = annotation_index
+        # st.title(model.pageTitle + " - " + annotation_selection)
+        if model.img_file is None:
+            st.caption(model.no_annotation_file)
+            return
+        saved_state = self.fetch_annotations(model.rects_file)
+        # annotation file has been changed
+        if annotation_index != st.session_state['annotation_index']:
+            annotation_v = saved_state['meta']['version']
+            if annotation_v == "v0.1":
+                st.session_state["annotation_done"] = False
+            else:
+                st.session_state["annotation_done"] = True
+        # store the annotation file index
+        st.session_state['annotation_index'] = annotation_index
+        # first load
+        if "annotation_done" not in st.session_state:
+            annotation_v = saved_state['meta']['version']
+            if annotation_v == "v0.1":
+                st.session_state["annotation_done"] = False
+            else:
+                st.session_state["annotation_done"] = True
+        with completed_check:
+            annotation_done = st.checkbox(model.completed_text, help=model.completed_help, key="annotation_done")
+            if annotation_done:
+                saved_state['meta']['version'] = "v1.0"
+            else:
+                saved_state['meta']['version'] = "v0.1"
+            with open(model.rects_file, "w") as f:
+                json.dump(saved_state, f, indent=2)
+            st.session_state[model.rects_file] = saved_state
+        assign_labels = st.checkbox(model.assign_labels_text, True, help=model.assign_labels_help)
+        mode = "transform" if assign_labels else "rect"
+        docImg = Image.open(model.img_file)
+        data_processor = DataProcessor()
+        with st.container():
+            doc_height = saved_state['meta']['image_size']['height']
+            doc_width = saved_state['meta']['image_size']['width']
+            canvas_width, number_of_columns = self.canvas_available_width(ui_width, doc_width, device_type,
+                                                                          device_width)
+            if number_of_columns > 1:
+                col1, col2 = st.columns([number_of_columns, 10 - number_of_columns])
+                with col1:
+                    result_rects = self.render_doc(model, docImg, saved_state, mode, canvas_width, doc_height, doc_width)
+                with col2:
+                    tab = st.radio("Select", ["Mapping", "Grouping", "Ordering"], horizontal=True,
+                                   label_visibility="collapsed")
+                    if tab == "Mapping":
+                        self.render_form(model, result_rects, data_processor, annotation_selection)
+                    elif tab == "Grouping":
+                        self.group_annotations(model, result_rects)
+                    elif tab == "Ordering":
+                        self.order_annotations(model, model.labels, model.groups, result_rects)
+            else:
+                result_rects = self.render_doc(model, docImg, saved_state, mode, canvas_width, doc_height, doc_width)
+                tab = st.radio("Select", ["Mapping", "Grouping"], horizontal=True, label_visibility="collapsed")
+                if tab == "Mapping":
+                    self.render_form(model, result_rects, data_processor, annotation_selection)
+                else:
+                    self.group_annotations(model, result_rects)
+    def render_doc(self, model, docImg, saved_state, mode, canvas_width, doc_height, doc_width):
+        with st.container():
+            height = 1296
+            width = 864
+            result_rects = st_sparrow_labeling(
+                fill_color="rgba(0, 151, 255, 0.3)",
+                stroke_width=2,
+                stroke_color="rgba(0, 50, 255, 0.7)",
+                background_image=docImg,
+                initial_rects=saved_state,
+                height=height,
+                width=width,
+                drawing_mode=mode,
+                display_toolbar=True,
+                update_streamlit=True,
+                canvas_width=canvas_width,
+                doc_height=doc_height,
+                doc_width=doc_width,
+                image_rescale=True,
+                key="doc_annotation" + model.img_file
+            )
+            st.caption(model.text_caption_1)
+            st.caption(model.text_caption_2)
+            return result_rects
+    def render_form(self, model, result_rects, data_processor, annotation_selection):
+        with st.container():
+            if result_rects is not None:
+                with st.form(key="fields_form"):
+                    toolbar = st.empty()
+                    self.render_form_view(result_rects.rects_data['words'], model.labels, result_rects,
+                                          data_processor)
+                    with toolbar:
+                        submit = st.form_submit_button(model.save_text, type="primary")
+                        if submit:
+                            for word in result_rects.rects_data['words']:
+                                if len(word['value']) > 1000:
+                                    st.error(model.error_text)
+                                    return
+                            with open(model.rects_file, "w") as f:
+                                json.dump(result_rects.rects_data, f, indent=2)
+                            st.session_state[model.rects_file] = result_rects.rects_data
+                            # st.write(model.saved_text)
+                            st.experimental_rerun()
+                if len(result_rects.rects_data['words']) == 0:
+                    st.caption(model.no_annotation_mapping)
+                    return
+                else:
+                    with open(model.rects_file, 'rb') as file:
+                        st.download_button(label=model.download_text,
+                                           data=file,
+                                           file_name=annotation_selection + ".json",
+                                           mime='application/json',
+                                           help=model.download_hint)
+    def render_form_view(self, words, labels, result_rects, data_processor):
+        data = []
+        for i, rect in enumerate(words):
+            group, label = rect['label'].split(":", 1) if ":" in rect['label'] else (None, rect['label'])
+            data.append({'id': i, 'value': rect['value'], 'label': label})
+        df = pd.DataFrame(data)
+        formatter = {
+            'id': ('ID', {**PINLEFT, 'hide': True}),
+            'value': ('Value', {**PINLEFT, 'editable': True}),
+            'label': ('Label', {**PINLEFT,
+                                'width': 80,
+                                'editable': True,
+                                'cellEditor': 'agSelectCellEditor',
+                                'cellEditorParams': {
+                                    'values': labels
+                                }})
+        }
+        go = {
+            'rowClassRules': {
+                'row-selected': 'data.id === ' + str(result_rects.current_rect_index)
+            }
+        }
+        green_light = "#abf7b1"
+        css = {
+            '.row-selected': {
+                'background-color': f'{green_light} !important'
+            }
+        }
+        response = agstyler.draw_grid(
+            df,
+            formatter=formatter,
+            fit_columns=True,
+            grid_options=go,
+            css=css
+        )
+        data = response['data'].values.tolist()
+        for i, rect in enumerate(words):
+            value = data[i][1]
+            label = data[i][2]
+            data_processor.update_rect_data(result_rects.rects_data, i, value, label)
+    def canvas_available_width(self, ui_width, doc_width, device_type, device_width):
+        doc_width_pct = (doc_width * 100) / ui_width
+        if doc_width_pct < 45:
+            canvas_width_pct = 37
+        elif doc_width_pct < 55:
+            canvas_width_pct = 49
+        else:
+            canvas_width_pct = 60
+        if ui_width > 700 and canvas_width_pct == 37 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 4
+        elif ui_width > 700 and canvas_width_pct == 49 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 5
+        elif ui_width > 700 and canvas_width_pct == 60 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 6
+        else:
+            if device_type == "desktop":
+                ui_width = device_width - math.floor((device_width * 22) / 100)
+            elif device_type == "mobile":
+                ui_width = device_width - math.floor((device_width * 13) / 100)
+            return ui_width, 1
+    def fetch_annotations(self, rects_file):
+        for key in st.session_state:
+            if key.startswith("docs/json/") and key != rects_file:
+                del st.session_state[key]
+        if rects_file not in st.session_state:
+            with open(rects_file, "r") as f:
+                saved_state = json.load(f)
+                st.session_state[rects_file] = saved_state
+        else:
+            saved_state = st.session_state[rects_file]
+        return saved_state
+    def upload_file(self, uploaded_file):
+        if uploaded_file is not None:
+            if os.path.exists(os.path.join("docs/images/", uploaded_file.name)):
+                st.write("File already exists")
+                return False
+            if len(uploaded_file.name) > 100:
+                st.write("File name too long")
+                return False
+            with open(os.path.join("docs/images/", uploaded_file.name), "wb") as f:
+                f.write(uploaded_file.getbuffer())
+            img_file = Image.open(os.path.join("docs/images/", uploaded_file.name))
+            annotations_json = {
+                "meta": {
+                    "version": "v0.1",
+                    "split": "train",
+                    "image_id": len(self.get_existing_file_names("docs/images/")),
+                    "image_size": {
+                        "width": img_file.width,
+                        "height": img_file.height
+                    }
+                },
+                "words": []
+            }
+            file_name = uploaded_file.name.split(".")[0]
+            with open(os.path.join("docs/json/", file_name + ".json"), "w") as f:
+                json.dump(annotations_json, f, indent=2)
+            st.success("File uploaded successfully")
+    def get_existing_file_names(self, dir_name):
+        # get ordered list of files without file extension, excluding hidden files
+        return natsorted([os.path.splitext(f)[0] for f in os.listdir(dir_name) if not f.startswith('.')])
+    def get_file_extension(self, file_name, dir_name):
+        # get list of files, excluding hidden files
+        files = [f for f in os.listdir(dir_name) if not f.startswith('.')]
+        for f in files:
+            if file_name is not  None and os.path.splitext(f)[0] == file_name:
+                return os.path.splitext(f)[1]
+    def get_annotation_index(self, file, files_list):
+        return files_list.index(file)
+    def group_annotations(self, model, result_rects):
+        with st.form(key="grouping_form"):
+            if result_rects is not None:
+                words = result_rects.rects_data['words']
+                data = []
+                for i, rect in enumerate(words):
+                    data.append({'id': i, 'value': rect['value']})
+                df = pd.DataFrame(data)
+                formatter = {
+                    'id': ('ID', {**PINLEFT, 'width': 50}),
+                    'value': ('Value', PINLEFT)
+                }
+                toolbar = st.empty()
+                response = agstyler.draw_grid(
+                    df,
+                    formatter=formatter,
+                    fit_columns=True,
+                    selection='multiple',
+                    use_checkbox='True',
+                    pagination_size=40
+                )
+                rows = response['selected_rows']
+                with toolbar:
+                    submit = st.form_submit_button(model.save_text, type="primary")
+                    if submit and len(rows) > 0:
+                        # check if there are gaps in the selected rows
+                        if len(rows) > 1:
+                            for i in range(len(rows) - 1):
+                                if rows[i]['id'] + 1 != rows[i + 1]['id']:
+                                    st.error(model.selection_must_be_continuous)
+                                    return
+                        words = result_rects.rects_data['words']
+                        new_words_list = []
+                        coords = []
+                        for row in rows:
+                            word_value = words[row['id']]['value']
+                            rect = words[row['id']]['rect']
+                            coords.append(rect)
+                            new_words_list.append(word_value)
+                        # convert array to string
+                        new_word = " ".join(new_words_list)
+                        # Get min x1 value from coords array
+                        x1_min = min([coord['x1'] for coord in coords])
+                        y1_min = min([coord['y1'] for coord in coords])
+                        x2_max = max([coord['x2'] for coord in coords])
+                        y2_max = max([coord['y2'] for coord in coords])
+                        words[rows[0]['id']]['value'] = new_word
+                        words[rows[0]['id']]['rect'] = {
+                            "x1": x1_min,
+                            "y1": y1_min,
+                            "x2": x2_max,
+                            "y2": y2_max
+                        }
+                        # loop array in reverse order and remove selected entries
+                        i = 0
+                        for row in rows[::-1]:
+                            if i == len(rows) - 1:
+                                break
+                            del words[row['id']]
+                            i += 1
+                        result_rects.rects_data['words'] = words
+                        with open(model.rects_file, "w") as f:
+                            json.dump(result_rects.rects_data, f, indent=2)
+                        st.session_state[model.rects_file] = result_rects.rects_data
+                        st.experimental_rerun()
+    def order_annotations(self, model, labels, groups, result_rects):
+        if result_rects is not None:
+            self.action_event = None
+            data = []
+            idx_list = [""]
+            words = result_rects.rects_data['words']
+            for i, rect in enumerate(words):
+                if rect['label'] != "":
+                    # split string into two variables, assign None to first variable if no split is found
+                    group, label = rect['label'].split(":", 1) if ":" in rect['label'] else (None, rect['label'])
+                    data.append({'id': i, 'value': rect['value'], 'label': label, 'group': group})
+                    idx_list.append(i)
+            df = pd.DataFrame(data)
+            formatter = {
+                'id': ('ID', {**PINLEFT, 'width': 50}),
+                'value': ('Value', {**PINLEFT}),
+                'label': ('Label', {**PINLEFT,
+                                    'width': 80,
+                                    'editable': False,
+                                    'cellEditor': 'agSelectCellEditor',
+                                    'cellEditorParams': {
+                                        'values': labels
+                                    }}),
+                'group': ('Group', {**PINLEFT,
+                                    'width': 80,
+                                    'editable': True,
+                                    'cellEditor': 'agSelectCellEditor',
+                                    'cellEditorParams': {
+                                        'values': groups
+                                    }})
+            }
+            go = {
+                'rowClassRules': {
+                    'row-selected': 'data.id === ' + str(result_rects.current_rect_index)
+                }
+            }
+            green_light = "#abf7b1"
+            css = {
+                '.row-selected': {
+                    'background-color': f'{green_light} !important'
+                }
+            }
+            idx_option = st.selectbox('Select row to move into', idx_list)
+            def run_component(props):
+                value = component_toolbar_main(key='toolbar_main', **props)
+                return value
+            def handle_event(value):
+                if value is not None:
+                    if 'action_timestamp' not in st.session_state:
+                        self.action_event = value['action']
+                        st.session_state['action_timestamp'] = value['timestamp']
+                    else:
+                        if st.session_state['action_timestamp'] != value['timestamp']:
+                            self.action_event = value['action']
+                            st.session_state['action_timestamp'] = value['timestamp']
+                        else:
+                            self.action_event = None
+            props = {
+                'buttons': {
+                    'up': {
+                        'disabled': False,
+                        'rendered': ''
+                    },
+                    'down': {
+                        'disabled': False,
+                        'rendered': ''
+                    },
+                    'save': {
+                        'disabled': False,
+                        'rendered': ''
+                        # 'rendered': 'none',
+                    }
+                }
+            }
+            handle_event(run_component(props))
+            response = agstyler.draw_grid(
+                df,
+                formatter=formatter,
+                fit_columns=True,
+                grid_options=go,
+                css=css
+            )
+            rows = response['selected_rows']
+            if len(rows) == 0 and result_rects.current_rect_index > -1:
+                for i, row in enumerate(data):
+                    if row['id'] == result_rects.current_rect_index:
+                        rows = [
+                            {
+                                '_selectedRowNodeInfo': {
+                                    'nodeRowIndex': i
+                                },
+                                'id': row['id']
+                            }
+                        ]
+                        break
+            if str(self.action_event) == 'up':
+                if len(rows) > 0:
+                    idx = rows[0]['_selectedRowNodeInfo']['nodeRowIndex']
+                    if idx > 0:
+                        row_id = rows[0]['id']
+                        if row_id == idx_option:
+                            return
+                        # swap row upwards in the array
+                        if idx_option == "":
+                            words[row_id], words[row_id - 1] = words[row_id - 1], words[row_id]
+                        else:
+                            for i in range(1000):
+                                words[row_id], words[row_id - 1] = words[row_id - 1], words[row_id]
+                                row_id -= 1
+                                if row_id == idx_option:
+                                    break
+                        result_rects.rects_data['words'] = words
+                        with open(model.rects_file, "w") as f:
+                            json.dump(result_rects.rects_data, f, indent=2)
+                        st.session_state[model.rects_file] = result_rects.rects_data
+                        st.experimental_rerun()
+            elif str(self.action_event) == 'down':
+                if len(rows) > 0:
+                    idx = rows[0]['_selectedRowNodeInfo']['nodeRowIndex']
+                    if idx < len(df) - 1:
+                        row_id = rows[0]['id']
+                        if row_id == idx_option:
+                            return
+                        # swap row downwards in the array
+                        if idx_option == "":
+                            words[row_id], words[row_id + 1] = words[row_id + 1], words[row_id]
+                        else:
+                            for i in range(1000):
+                                words[row_id], words[row_id + 1] = words[row_id + 1], words[row_id]
+                                row_id += 1
+                                if row_id == idx_option:
+                                    break
+                        result_rects.rects_data['words'] = words
+                        with open(model.rects_file, "w") as f:
+                            json.dump(result_rects.rects_data, f, indent=2)
+                        st.session_state[model.rects_file] = result_rects.rects_data
+                        st.experimental_rerun()
+            elif str(self.action_event) == 'save':
+                data = response['data'].values.tolist()
+                for elem in data:
+                    if elem[3] != "None":
+                        idx = elem[0]
+                        group = elem[3]
+                        words[idx]['label'] = f"{group}:{elem[2]}"
+                result_rects.rects_data['words'] = words
+                with open(model.rects_file, "w") as f:
+                    json.dump(result_rects.rects_data, f, indent=2)
+                st.session_state[model.rects_file] = result_rects.rects_data
+                st.experimental_rerun()
+    def export_labels(self, model):
+        path_from = os.path.join("docs/json/")
+        path_to = os.path.join("docs/json/key/")
+        files = [f for f in os.listdir(path_from) if not f.startswith('.')]
+        for file in files:
+            path = os.path.join(path_from, file)
+            if os.path.isfile(path):
+                with open(path, "r") as f:
+                    data = json.load(f)
+                    words = data['words']
+                    keys = {}
+                    row_keys = {}
+                    for word in words:
+                        if word['label'] != '':
+                            if ':' in word['label']:
+                                group, label = word['label'].split(':', 1)
+                                if 'row' not in group:
+                                    if group not in keys:
+                                        keys[group] = {}
+                                    keys[group][label] = word['value']
+                                else:
+                                    if "items" not in keys:
+                                        keys["items"] = []
+                                    if group not in row_keys:
+                                        row_keys[group] = {}
+                                    row_keys[group][label] = word['value']
+                            else:
+                                keys[word['label']] = word['value']
+                    if row_keys != {}:
+                        for key in row_keys:
+                            keys["items"].append(row_keys[key])
+                    if keys != {}:
+                        path = os.path.join(path_to, file)
+                        with open(path, "w") as f:
+                            json.dump(keys, f, indent=2)

toolbar/views/data_inference.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import streamlit as st
+import os
+import time
+from PIL import Image
+import math
+from streamlit_sparrow_labeling import st_sparrow_labeling
+import requests
+from config import settings
+import json
+class DataInference:
+    class Model:
+        # pageTitle = "Data Inference"
+        subheader_2 = "Upload"
+        initial_msg = "Please upload a file for inference"
+        upload_help = "Upload a file to extract data from it"
+        upload_button_text = "Upload"
+        upload_button_text_desc = "Choose a file"
+        extract_data = "Extract Data"
+        model_in_use = "donut"
+        img_file = None
+        def set_image_file(self, img_file):
+            st.session_state['img_file'] = img_file
+        def get_image_file(self):
+            if 'img_file' not in st.session_state:
+                return None
+            return st.session_state['img_file']
+        data_result = None
+        def set_data_result(self, data_result):
+            st.session_state['data_result'] = data_result
+        def get_data_result(self):
+            if 'data_result' not in st.session_state:
+                return None
+            return st.session_state['data_result']
+    def view(self, model, ui_width, device_type, device_width):
+        # st.title(model.pageTitle)
+        with st.sidebar:
+            st.markdown("---")
+            st.subheader(model.subheader_2)
+            with st.form("upload-form", clear_on_submit=True):
+                uploaded_file = st.file_uploader(model.upload_button_text_desc, accept_multiple_files=False,
+                                                 type=['png', 'jpg', 'jpeg'],
+                                                 help=model.upload_help)
+                submitted = st.form_submit_button(model.upload_button_text)
+                if submitted and uploaded_file is not None:
+                    ret = self.upload_file(uploaded_file)
+                    if ret is not False:
+                        model.set_image_file(ret)
+                        model.set_data_result(None)
+        if model.get_image_file() is not None:
+            doc_img = Image.open(model.get_image_file())
+            doc_height = doc_img.height
+            doc_width = doc_img.width
+            canvas_width, number_of_columns = self.canvas_available_width(ui_width, doc_width, device_type,
+                                                                          device_width)
+            if number_of_columns > 1:
+                col1, col2 = st.columns([number_of_columns, 10 - number_of_columns])
+                with col1:
+                    self.render_doc(model, doc_img, canvas_width, doc_height, doc_width)
+                with col2:
+                    self.render_results(model)
+            else:
+                self.render_doc(model, doc_img, canvas_width, doc_height, doc_width)
+                self.render_results(model)
+        else:
+            st.title(model.initial_msg)
+    def upload_file(self, uploaded_file):
+        timestamp = str(time.time())
+        timestamp = timestamp.replace(".", "")
+        file_name, file_extension = os.path.splitext(uploaded_file.name)
+        uploaded_file.name = file_name + "_" + timestamp + file_extension
+        if os.path.exists(os.path.join("docs/inference/", uploaded_file.name)):
+            st.write("File already exists")
+            return False
+        if len(uploaded_file.name) > 500:
+            st.write("File name too long")
+            return False
+        with open(os.path.join("docs/inference/", uploaded_file.name), "wb") as f:
+            f.write(uploaded_file.getbuffer())
+        st.success("File uploaded successfully")
+        return os.path.join("docs/inference/", uploaded_file.name)
+    def canvas_available_width(self, ui_width, doc_width, device_type, device_width):
+        doc_width_pct = (doc_width * 100) / ui_width
+        if doc_width_pct < 45:
+            canvas_width_pct = 37
+        elif doc_width_pct < 55:
+            canvas_width_pct = 49
+        else:
+            canvas_width_pct = 60
+        if ui_width > 700 and canvas_width_pct == 37 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 4
+        elif ui_width > 700 and canvas_width_pct == 49 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 5
+        elif ui_width > 700 and canvas_width_pct == 60 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 6
+        else:
+            if device_type == "desktop":
+                ui_width = device_width - math.floor((device_width * 22) / 100)
+            elif device_type == "mobile":
+                ui_width = device_width - math.floor((device_width * 13) / 100)
+            return ui_width, 1
+    def render_doc(self, model, doc_img, canvas_width, doc_height, doc_width):
+        height = 1296
+        width = 864
+        annotations_json = {
+            "meta": {
+                "version": "v0.1",
+                "split": "train",
+                "image_id": 0,
+                "image_size": {
+                    "width": doc_width,
+                    "height": doc_height
+                }
+            },
+            "words": []
+        }
+        st_sparrow_labeling(
+            fill_color="rgba(0, 151, 255, 0.3)",
+            stroke_width=2,
+            stroke_color="rgba(0, 50, 255, 0.7)",
+            background_image=doc_img,
+            initial_rects=annotations_json,
+            height=height,
+            width=width,
+            drawing_mode="transform",
+            display_toolbar=False,
+            update_streamlit=False,
+            canvas_width=canvas_width,
+            doc_height=doc_height,
+            doc_width=doc_width,
+            image_rescale=True,
+            key="doc_annotation" + model.get_image_file()
+        )
+    def render_results(self, model):
+        with st.form(key="results_form"):
+            button_placeholder = st.empty()
+            submit = button_placeholder.form_submit_button(model.extract_data, type="primary")
+            if 'inference_error' in st.session_state:
+                st.error(st.session_state.inference_error)
+                del st.session_state.inference_error
+            if submit:
+                button_placeholder.empty()
+                api_url = "https://katanaml-org-sparrow-ml.hf.space/api-inference/v1/sparrow-ml/inference"
+                file_path = model.get_image_file()
+                with open(file_path, "rb") as file:
+                    model_in_use = model.model_in_use
+                    sparrow_key = settings.sparrow_key
+                    # Prepare the payload
+                    files = {
+                        'file': (file.name, file, 'image/jpeg')
+                    }
+                    data = {
+                        'image_url': '',
+                        'model_in_use': model_in_use,
+                        'sparrow_key': sparrow_key
+                    }
+                    with st.spinner("Extracting data from document..."):
+                        response = requests.post(api_url, data=data, files=files, timeout=180)
+                if response.status_code != 200:
+                    print('Request failed with status code:', response.status_code)
+                    print('Response:', response.text)
+                    st.session_state["inference_error"] = "Error extracting data from document"
+                    st.experimental_rerun()
+                model.set_data_result(response.text)
+                # Display JSON data in Streamlit
+                st.markdown("---")
+                st.json(response.text)
+                # replace file extension to json
+                file_path = file_path.replace(".jpg", ".json")
+                with open(file_path, "w") as f:
+                    json.dump(response.text, f, indent=2)
+                st.experimental_rerun()
+            else:
+                if model.get_data_result() is not None:
+                    st.markdown("---")
+                    st.json(model.get_data_result())

toolbar/views/data_review.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import streamlit as st
+from natsort import natsorted
+import os
+from PIL import Image
+import math
+from streamlit_sparrow_labeling import st_sparrow_labeling
+import json
+class DataReview:
+    class Model:
+        # pageTitle = "Data Review"
+        subheader_2 = "Select"
+        subheader_3 = "Result"
+        selection_text = "File to review"
+        initial_msg = "Please select a file to review"
+        img_file = None
+        def set_image_file(self, img_file):
+            st.session_state['img_file_review'] = img_file
+        def get_image_file(self):
+            if 'img_file_review' not in st.session_state:
+                return None
+            return st.session_state['img_file_review']
+        json_file = None
+        def set_json_file(self, json_file):
+            st.session_state['json_file_review'] = json_file
+        def get_json_file(self):
+            if 'json_file_review' not in st.session_state:
+                return None
+            return st.session_state['json_file_review']
+    def view(self, model, ui_width, device_type, device_width):
+        # st.title(model.pageTitle)
+        with st.sidebar:
+            st.markdown("---")
+            st.subheader(model.subheader_2)
+            # get list of files in inference directory
+            processed_file_names = self.get_processed_file_names('docs/inference/')
+            if 'selection_index' not in st.session_state:
+                st.session_state['selection_index'] = 0
+                selection_index = 0
+            else:
+                selection_index = st.session_state['selection_index']
+            selection = st.selectbox(model.selection_text, processed_file_names, index=selection_index)
+            selection_index = self.get_selection_index(selection, processed_file_names)
+            st.session_state['selection_index'] = selection_index
+        img_file = "docs/inference/" + selection + ".jpg"
+        json_file = "docs/inference/" + selection + ".json"
+        model.set_image_file(img_file)
+        model.set_json_file(json_file)
+        if model.get_image_file() is not None:
+            doc_img = Image.open(model.get_image_file())
+            doc_height = doc_img.height
+            doc_width = doc_img.width
+            canvas_width, number_of_columns = self.canvas_available_width(ui_width, doc_width, device_type,
+                                                                          device_width)
+            if number_of_columns > 1:
+                col1, col2 = st.columns([number_of_columns, 10 - number_of_columns])
+                with col1:
+                    pass
+                    self.render_doc(model, doc_img, canvas_width, doc_height, doc_width)
+                with col2:
+                    pass
+                    self.render_results(model)
+            else:
+                pass
+                self.render_doc(model, doc_img, canvas_width, doc_height, doc_width)
+                self.render_results(model)
+        else:
+            st.title(model.initial_msg)
+    def get_processed_file_names(self, dir_name):
+        # get ordered list of files without file extension, excluding hidden files, with JSON extension only
+        file_names = [os.path.splitext(f)[0] for f in os.listdir(dir_name) if
+                        os.path.isfile(os.path.join(dir_name, f)) and not f.startswith('.') and f.endswith('.json')]
+        file_names = natsorted(file_names)
+        return file_names
+    def get_selection_index(self, file, files_list):
+        return files_list.index(file)
+    def canvas_available_width(self, ui_width, doc_width, device_type, device_width):
+        doc_width_pct = (doc_width * 100) / ui_width
+        if doc_width_pct < 45:
+            canvas_width_pct = 37
+        elif doc_width_pct < 55:
+            canvas_width_pct = 49
+        else:
+            canvas_width_pct = 60
+        if ui_width > 700 and canvas_width_pct == 37 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 4
+        elif ui_width > 700 and canvas_width_pct == 49 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 5
+        elif ui_width > 700 and canvas_width_pct == 60 and device_type == "desktop":
+            return math.floor(canvas_width_pct * ui_width / 100), 6
+        else:
+            if device_type == "desktop":
+                ui_width = device_width - math.floor((device_width * 22) / 100)
+            elif device_type == "mobile":
+                ui_width = device_width - math.floor((device_width * 13) / 100)
+            return ui_width, 1
+    def render_doc(self, model, doc_img, canvas_width, doc_height, doc_width):
+        height = 1296
+        width = 864
+        annotations_json = {
+            "meta": {
+                "version": "v0.1",
+                "split": "train",
+                "image_id": 0,
+                "image_size": {
+                    "width": doc_width,
+                    "height": doc_height
+                }
+            },
+            "words": []
+        }
+        st_sparrow_labeling(
+            fill_color="rgba(0, 151, 255, 0.3)",
+            stroke_width=2,
+            stroke_color="rgba(0, 50, 255, 0.7)",
+            background_image=doc_img,
+            initial_rects=annotations_json,
+            height=height,
+            width=width,
+            drawing_mode="transform",
+            display_toolbar=False,
+            update_streamlit=False,
+            canvas_width=canvas_width,
+            doc_height=doc_height,
+            doc_width=doc_width,
+            image_rescale=True,
+            key="doc_annotation" + model.get_image_file()
+        )
+    def render_results(self, model):
+        json_file = model.get_json_file()
+        if json_file is not None:
+            with open(json_file) as f:
+                data_json = json.load(f)
+                st.subheader(model.subheader_3)
+                st.markdown("---")
+                st.json(data_json)
+                st.markdown("---")

toolbar/views/model_training.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import streamlit as st
+class ModelTraining:
+    class Model:
+        pageTitle = "Model Training"
+    def view(self, model):
+        st.title(model.pageTitle)

toolbar/views/model_tuning.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import streamlit as st
+class ModelTuning:
+    class Model:
+        pageTitle = "Model Tuning"
+    def view(self, model):
+        st.title(model.pageTitle)

toolbar/views/setup.py ADDED Viewed

	@@ -0,0 +1,233 @@

+import streamlit as st
+import json
+import pandas as pd
+from tools import agstyler
+from tools.agstyler import PINLEFT
+from toolbar import component_toolbar_buttons
+class Setup:
+    class Model:
+        header1 = "Labels"
+        header2 = "Groups"
+        labels_file = "docs/labels.json"
+        groups_file = "docs/groups.json"
+    def view(self, model):
+        tab = st.radio("Select", ["Labels", "Groups"], horizontal=True, label_visibility="collapsed")
+        if tab == "Labels":
+            st.title(model.header1)
+            self.setup_labels(model)
+        elif tab == "Groups":
+            st.title(model.header2)
+            self.setup_groups(model)
+    def setup_labels(self, model):
+        self.action_event = False
+        if 'action' not in st.session_state:
+            st.session_state['action'] = None
+        with open(model.labels_file, "r") as f:
+            labels_json = json.load(f)
+        labels = labels_json["labels"]
+        data = []
+        for label in labels:
+            data.append({'id': label['id'], 'name': label['name'], 'description': label['description']})
+        self.df = pd.DataFrame(data)
+        formatter = {
+            'id': ('ID', {'hide': True}),
+            'name': ('Label', {**PINLEFT, 'editable': True}),
+            'description': ('Description', {**PINLEFT, 'editable': True})
+        }
+        def run_component(props):
+            value = component_toolbar_buttons(key='toolbar_buttons_labels', **props)
+            return value
+        def handle_event(value):
+            if value is not None:
+                if 'action_timestamp' not in st.session_state:
+                    self.action_event = True
+                    st.session_state['action_timestamp'] = value['timestamp']
+                else:
+                    if st.session_state['action_timestamp'] != value['timestamp']:
+                        self.action_event = True
+                        st.session_state['action_timestamp'] = value['timestamp']
+                    else:
+                        self.action_event = False
+            if value is not None and value['action'] == 'create' and self.action_event:
+                if st.session_state['action'] != 'delete':
+                    max_id = self.df['id'].max()
+                    self.df.loc[-1] = [max_id + 1, '', '']  # adding a row
+                    self.df.index = self.df.index + 1  # shifting index
+                    self.df.sort_index(inplace=True)
+                    st.session_state['action'] = 'create'
+            elif value is not None and value['action'] == 'delete' and self.action_event:
+                if st.session_state['action'] != 'delete' and st.session_state['action'] != 'create':
+                    rows = st.session_state['selected_rows']
+                    if len(rows) > 0:
+                        idx = rows[0]['_selectedRowNodeInfo']['nodeRowIndex']
+                        self.df.drop(self.df.index[idx], inplace=True)
+                        self.df.reset_index(drop=True, inplace=True)
+                    st.session_state['action'] = 'delete'
+            elif value is not None and value['action'] == 'save' and self.action_event:
+                st.session_state['action'] = 'save'
+        props = {
+            'buttons': {
+                'create': False,
+                'delete': False,
+                'save': False,
+            }
+        }
+        handle_event(run_component(props))
+        if st.session_state['action'] == 'save' and 'response' in st.session_state:
+            if st.session_state['response'] is not None:
+                self.df = st.session_state['response']
+            st.session_state['response'] = None
+        if st.session_state['action'] == 'create' and 'response' in st.session_state:
+            if st.session_state['response'] is not None:
+                self.df = st.session_state['response']
+        if st.session_state['action'] == 'delete' and 'response' in st.session_state:
+            if st.session_state['response'] is not None:
+                self.df = st.session_state['response']
+        response = agstyler.draw_grid(
+            self.df,
+            formatter=formatter,
+            fit_columns=True,
+            pagination_size=10,
+            selection="single",
+            use_checkbox=False
+        )
+        rows = response['selected_rows']
+        st.session_state['selected_rows'] = rows
+        if st.session_state['action'] == 'create' and self.action_event:
+            st.session_state['response'] = response['data']
+        elif st.session_state['action'] == 'delete' and self.action_event:
+            st.session_state['response'] = response['data']
+        elif st.session_state['action'] == 'save' and self.action_event:
+            data = response['data'].values.tolist()
+            rows = []
+            for row in data:
+                rows.append({'id': row[0], 'name': row[1], 'description': row[2]})
+            labels_json['labels'] = rows
+            with open(model.labels_file, "w") as f:
+                json.dump(labels_json, f, indent=2)
+    def setup_groups(self, model):
+        self.action_event = False
+        if 'action' not in st.session_state:
+            st.session_state['action'] = None
+        with open(model.groups_file, "r") as f:
+            groups_json = json.load(f)
+        groups = groups_json["groups"]
+        data = []
+        for group in groups:
+            data.append({'id': group['id'], 'name': group['name'], 'description': group['description']})
+        self.df = pd.DataFrame(data)
+        formatter = {
+            'id': ('ID', {'hide': True}),
+            'name': ('Group', {**PINLEFT, 'editable': True}),
+            'description': ('Description', {**PINLEFT, 'editable': True})
+        }
+        def run_component(props):
+            value = component_toolbar_buttons(key='toolbar_buttons_groups', **props)
+            return value
+        def handle_event(value):
+            if value is not None:
+                if 'action_timestamp' not in st.session_state:
+                    self.action_event = True
+                    st.session_state['action_timestamp'] = value['timestamp']
+                else:
+                    if st.session_state['action_timestamp'] != value['timestamp']:
+                        self.action_event = True
+                        st.session_state['action_timestamp'] = value['timestamp']
+                    else:
+                        self.action_event = False
+            if value is not None and value['action'] == 'create' and self.action_event:
+                if st.session_state['action'] != 'delete':
+                    max_id = self.df['id'].max()
+                    self.df.loc[-1] = [max_id + 1, '', '']  # adding a row
+                    self.df.index = self.df.index + 1  # shifting index
+                    self.df.sort_index(inplace=True)
+                    st.session_state['action'] = 'create'
+            elif value is not None and value['action'] == 'delete' and self.action_event:
+                if st.session_state['action'] != 'delete' and st.session_state['action'] != 'create':
+                    rows = st.session_state['selected_rows']
+                    if len(rows) > 0:
+                        idx = rows[0]['_selectedRowNodeInfo']['nodeRowIndex']
+                        self.df.drop(self.df.index[idx], inplace=True)
+                        self.df.reset_index(drop=True, inplace=True)
+                    st.session_state['action'] = 'delete'
+            elif value is not None and value['action'] == 'save' and self.action_event:
+                st.session_state['action'] = 'save'
+        props = {
+            'buttons': {
+                'create': False,
+                'delete': False,
+                'save': False,
+            }
+        }
+        handle_event(run_component(props))
+        if st.session_state['action'] == 'save' and 'response' in st.session_state:
+            if st.session_state['response'] is not None:
+                self.df = st.session_state['response']
+            st.session_state['response'] = None
+        if st.session_state['action'] == 'create' and 'response' in st.session_state:
+            if st.session_state['response'] is not None:
+                self.df = st.session_state['response']
+        if st.session_state['action'] == 'delete' and 'response' in st.session_state:
+            if st.session_state['response'] is not None:
+                self.df = st.session_state['response']
+        response = agstyler.draw_grid(
+            self.df,
+            formatter=formatter,
+            fit_columns=True,
+            pagination_size=10,
+            selection="single",
+            use_checkbox=False
+        )
+        rows = response['selected_rows']
+        st.session_state['selected_rows'] = rows
+        if st.session_state['action'] == 'create' and self.action_event:
+            st.session_state['response'] = response['data']
+        elif st.session_state['action'] == 'delete' and self.action_event:
+            st.session_state['response'] = response['data']
+        elif st.session_state['action'] == 'save' and self.action_event:
+            data = response['data'].values.tolist()
+            rows = []
+            for row in data:
+                rows.append({'id': row[0], 'name': row[1], 'description': row[2]})
+            groups_json['groups'] = rows
+            with open(model.groups_file, "w") as f:
+                json.dump(groups_json, f, indent=2)