JohnTan38 commited on
Commit
e8a20da
1 Parent(s): 984fd33

Upload folder using huggingface_hub

Browse files
toolbar/config.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ class Settings():
2
+ sparrow_key = ""
3
+
4
+
5
+ settings = Settings()
toolbar/main.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from streamlit_option_menu import option_menu
3
+ from tools.utilities import load_css
4
+ import json
5
+
6
+ from views.dashboard import Dashboard
7
+ from views.data_annotation import DataAnnotation
8
+ from views.model_training import ModelTraining
9
+ from views.model_tuning import ModelTuning
10
+ from views.data_inference import DataInference
11
+ from views.setup import Setup
12
+ from views.data_review import DataReview
13
+ from views.about import About
14
+
15
+ import streamlit_javascript as st_js
16
+
17
+ st.set_page_config(
18
+ page_title="Sparrow",
19
+ page_icon="favicon.ico",
20
+ layout="wide"
21
+ )
22
+
23
+ load_css()
24
+
25
+
26
+ class Model:
27
+ menuTitle = "Sparrow"
28
+ option1 = "Dashboard"
29
+ option2 = "Data Annotation"
30
+ option3 = "Model Training"
31
+ option4 = "Model Tuning"
32
+ option5 = "Inference"
33
+ option6 = "Data Review"
34
+ option7 = "Setup"
35
+ option8 = "About"
36
+
37
+ menuIcon = "menu-up"
38
+ icon1 = "speedometer"
39
+ icon2 = "activity"
40
+ icon3 = "motherboard"
41
+ icon4 = "graph-up-arrow"
42
+ icon5 = "journal-arrow-down"
43
+ icon6 = "droplet"
44
+ icon7 = "clipboard-data"
45
+ icon8 = "chat"
46
+
47
+
48
+ def view(model):
49
+ with st.sidebar:
50
+ menuItem = option_menu(model.menuTitle,
51
+ [model.option1, model.option2, model.option5, model.option6, model.option7, model.option8],
52
+ icons=[model.icon1, model.icon2, model.icon5, model.icon6, model.icon7, model.icon8],
53
+ menu_icon=model.menuIcon,
54
+ default_index=0,
55
+ styles={
56
+ "container": {"padding": "5!important", "background-color": "#fafafa"},
57
+ "icon": {"color": "black", "font-size": "25px"},
58
+ "nav-link": {"font-size": "16px", "text-align": "left", "margin": "0px",
59
+ "--hover-color": "#eee"},
60
+ "nav-link-selected": {"background-color": "#037ffc"},
61
+ })
62
+
63
+ if menuItem == model.option1:
64
+ Dashboard().view(Dashboard.Model())
65
+ logout_widget()
66
+
67
+ if menuItem == model.option2:
68
+ if 'ui_width' not in st.session_state or 'device_type' not in st.session_state or 'device_width' not in st.session_state:
69
+ # Get UI width
70
+ ui_width = st_js.st_javascript("window.innerWidth", key="ui_width_comp")
71
+ device_width = st_js.st_javascript("window.screen.width", key="device_width_comp")
72
+
73
+ if ui_width > 0 and device_width > 0:
74
+ # Add 20% of current screen width to compensate for the sidebar
75
+ ui_width = round(ui_width + (20 * ui_width / 100))
76
+
77
+ if device_width > 768:
78
+ device_type = 'desktop'
79
+ else:
80
+ device_type = 'mobile'
81
+
82
+ st.session_state['ui_width'] = ui_width
83
+ st.session_state['device_type'] = device_type
84
+ st.session_state['device_width'] = device_width
85
+
86
+ st.experimental_rerun()
87
+ else:
88
+ DataAnnotation().view(DataAnnotation.Model(), st.session_state['ui_width'], st.session_state['device_type'],
89
+ st.session_state['device_width'])
90
+ logout_widget()
91
+
92
+ if menuItem == model.option3:
93
+ ModelTraining().view(ModelTraining.Model())
94
+ logout_widget()
95
+
96
+ if menuItem == model.option4:
97
+ ModelTuning().view(ModelTuning.Model())
98
+ logout_widget()
99
+
100
+ if menuItem == model.option5:
101
+ if 'ui_width' not in st.session_state or 'device_type' not in st.session_state or 'device_width' not in st.session_state:
102
+ # Get UI width
103
+ ui_width = st_js.st_javascript("window.innerWidth", key="ui_width_comp")
104
+ device_width = st_js.st_javascript("window.screen.width", key="device_width_comp")
105
+
106
+ if ui_width > 0 and device_width > 0:
107
+ # Add 20% of current screen width to compensate for the sidebar
108
+ ui_width = round(ui_width + (20 * ui_width / 100))
109
+
110
+ if device_width > 768:
111
+ device_type = 'desktop'
112
+ else:
113
+ device_type = 'mobile'
114
+
115
+ st.session_state['ui_width'] = ui_width
116
+ st.session_state['device_type'] = device_type
117
+ st.session_state['device_width'] = device_width
118
+
119
+ st.experimental_rerun()
120
+ else:
121
+ DataInference().view(DataInference.Model(), st.session_state['ui_width'], st.session_state['device_type'],
122
+ st.session_state['device_width'])
123
+
124
+ logout_widget()
125
+
126
+ if menuItem == model.option6:
127
+ if 'ui_width' not in st.session_state or 'device_type' not in st.session_state or 'device_width' not in st.session_state:
128
+ # Get UI width
129
+ ui_width = st_js.st_javascript("window.innerWidth", key="ui_width_comp")
130
+ device_width = st_js.st_javascript("window.screen.width", key="device_width_comp")
131
+
132
+ if ui_width > 0 and device_width > 0:
133
+ # Add 20% of current screen width to compensate for the sidebar
134
+ ui_width = round(ui_width + (20 * ui_width / 100))
135
+
136
+ if device_width > 768:
137
+ device_type = 'desktop'
138
+ else:
139
+ device_type = 'mobile'
140
+
141
+ st.session_state['ui_width'] = ui_width
142
+ st.session_state['device_type'] = device_type
143
+ st.session_state['device_width'] = device_width
144
+
145
+ st.experimental_rerun()
146
+ else:
147
+ DataReview().view(DataReview.Model(), st.session_state['ui_width'], st.session_state['device_type'],
148
+ st.session_state['device_width'])
149
+
150
+ logout_widget()
151
+
152
+ if menuItem == model.option7:
153
+ Setup().view(Setup.Model())
154
+ logout_widget()
155
+
156
+ if menuItem == model.option8:
157
+ About().view(About.Model())
158
+ logout_widget()
159
+
160
+
161
+ def logout_widget():
162
+ with st.sidebar:
163
+ st.markdown("---")
164
+ # st.write("User:", "John Doe")
165
+ st.write("Version:", "2.0.0")
166
+ # st.button("Logout")
167
+ # st.markdown("---")
168
+
169
+ if 'visitors' not in st.session_state:
170
+ with open("docs/visitors.json", "r") as f:
171
+ visitors_json = json.load(f)
172
+ visitors = visitors_json["meta"]["visitors"]
173
+
174
+ visitors += 1
175
+ visitors_json["meta"]["visitors"] = visitors
176
+
177
+ with open("docs/visitors.json", "w") as f:
178
+ json.dump(visitors_json, f)
179
+
180
+ st.session_state['visitors'] = visitors
181
+ else:
182
+ visitors = st.session_state['visitors']
183
+
184
+ st.write("Counter:", visitors)
185
+
186
+
187
+ view(Model())
toolbar/toolbar/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import streamlit.components.v1 as components
2
+ component_toolbar_buttons = components.declare_component(
3
+ name='component_toolbar_buttons',
4
+ path='./toolbar'
5
+ )
toolbar/toolbar/index.html ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <html>
2
+ <head>
3
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/1.6.3/flowbite.min.css" rel="stylesheet" />
4
+ </head>
5
+
6
+ <!--
7
+ ----------------------------------------------------
8
+ Your custom static HTML goes in the body:
9
+ -->
10
+
11
+ <body>
12
+ <div class="inline-flex rounded-md shadow-sm" role="group">
13
+ <button id="create" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border border-gray-200 rounded-l-lg hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
14
+ <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
15
+ <path fill-rule="evenodd" d="M12 3.75a.75.75 0 01.75.75v6.75h6.75a.75.75 0 010 1.5h-6.75v6.75a.75.75 0 01-1.5 0v-6.75H4.5a.75.75 0 010-1.5h6.75V4.5a.75.75 0 01.75-.75z" clip-rule="evenodd"></path>
16
+ </svg>
17
+ Create
18
+ </button>
19
+ <button id="delete" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border-t border-b border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
20
+ <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
21
+ <path d="M6.75 9.25a.75.75 0 000 1.5h6.5a.75.75 0 000-1.5h-6.5z"></path>
22
+ </svg>
23
+ Delete
24
+ </button>
25
+ <button id="save" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border border-gray-200 rounded-r-md hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
26
+ <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
27
+ <path fill-rule="evenodd" d="M16.704 4.153a.75.75 0 01.143 1.052l-8 10.5a.75.75 0 01-1.127.075l-4.5-4.5a.75.75 0 011.06-1.06l3.894 3.893 7.48-9.817a.75.75 0 011.05-.143z" clip-rule="evenodd"></path>
28
+ </svg>
29
+ Save
30
+ </button>
31
+ </div>
32
+ </body>
33
+
34
+ <script type="text/javascript">
35
+ // ----------------------------------------------------
36
+ // Use these functions as is to perform required Streamlit
37
+ // component lifecycle actions:
38
+ //
39
+ // 1. Signal Streamlit client that component is ready
40
+ // 2. Signal Streamlit client to set visible height of the component
41
+ // (this is optional, in case Streamlit doesn't correctly auto-set it)
42
+ // 3. Pass values from component to Streamlit client
43
+ //
44
+
45
+ // Helper function to send type and data messages to Streamlit client
46
+
47
+ const SET_COMPONENT_VALUE = "streamlit:setComponentValue"
48
+ const RENDER = "streamlit:render"
49
+ const COMPONENT_READY = "streamlit:componentReady"
50
+ const SET_FRAME_HEIGHT = "streamlit:setFrameHeight"
51
+
52
+ function _sendMessage(type, data) {
53
+ // copy data into object
54
+ var outboundData = Object.assign({
55
+ isStreamlitMessage: true,
56
+ type: type,
57
+ }, data)
58
+
59
+ if (type == SET_COMPONENT_VALUE) {
60
+ console.log("_sendMessage data: " + JSON.stringify(data))
61
+ console.log("_sendMessage outboundData: " + JSON.stringify(outboundData))
62
+ }
63
+
64
+ window.parent.postMessage(outboundData, "*")
65
+ }
66
+
67
+ function initialize(pipeline) {
68
+
69
+ // Hook Streamlit's message events into a simple dispatcher of pipeline handlers
70
+ window.addEventListener("message", (event) => {
71
+ if (event.data.type == RENDER) {
72
+ // The event.data.args dict holds any JSON-serializable value
73
+ // sent from the Streamlit client. It is already deserialized.
74
+ pipeline.forEach(handler => {
75
+ handler(event.data.args)
76
+ })
77
+ }
78
+ })
79
+
80
+ _sendMessage(COMPONENT_READY, { apiVersion: 1 });
81
+
82
+ // Component should be mounted by Streamlit in an iframe, so try to autoset the iframe height.
83
+ window.addEventListener("load", () => {
84
+ window.setTimeout(function () {
85
+ setFrameHeight(document.documentElement.clientHeight)
86
+ }, 0)
87
+ })
88
+
89
+ // Optionally, if auto-height computation fails, you can manually set it
90
+ // (uncomment below)
91
+ //setFrameHeight(200)
92
+ }
93
+
94
+ function setFrameHeight(height) {
95
+ _sendMessage(SET_FRAME_HEIGHT, { height: height })
96
+ }
97
+
98
+ // The `data` argument can be any JSON-serializable value.
99
+ function notifyHost(data) {
100
+ _sendMessage(SET_COMPONENT_VALUE, data)
101
+ }
102
+
103
+ // ----------------------------------------------------
104
+ // Your custom functionality for the component goes here:
105
+
106
+ function call(button) {
107
+ timestamp = Date.now()
108
+ action = {
109
+ "action": button.id,
110
+ "timestamp": timestamp
111
+ }
112
+ notifyHost({
113
+ value: action,
114
+ dataType: "json",
115
+ })
116
+ }
117
+
118
+ // ----------------------------------------------------
119
+ // Here you can customize a pipeline of handlers for
120
+ // inbound properties from the Streamlit client app
121
+
122
+ // Set initial value sent from Streamlit!
123
+ function initializeProps_Handler(props) {
124
+ for (let key of Object.keys(props.buttons)) {
125
+ btn = document.getElementById(key)
126
+ btn.disabled = props.buttons[key]
127
+ }
128
+
129
+ // btn.disabled = props.initial_state.delete_disabled
130
+ }
131
+ // Access values sent from Streamlit!
132
+ function dataUpdate_Handler(props) {
133
+ // let msgLabel = document.getElementById("message_label")
134
+ // msgLabel.innerText = `Update [${props.counter}] at ${props.datetime}`
135
+ }
136
+ // Simply log received data dictionary
137
+ function log_Handler(props) {
138
+ console.log("Received from Streamlit: " + JSON.stringify(props))
139
+ }
140
+
141
+ let pipeline = [initializeProps_Handler, dataUpdate_Handler, log_Handler]
142
+
143
+ // ----------------------------------------------------
144
+ // Finally, initialize component passing in pipeline
145
+
146
+ initialize(pipeline)
147
+
148
+ </script>
149
+
150
+ </html>
toolbar/toolbar_main/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import streamlit.components.v1 as components
2
+ component_toolbar_main = components.declare_component(
3
+ name='component_toolbar_main',
4
+ path='./toolbar_main'
5
+ )
toolbar/toolbar_main/index.html ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <html>
2
+ <head>
3
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/flowbite/1.6.3/flowbite.min.css" rel="stylesheet" />
4
+ </head>
5
+
6
+ <!--
7
+ ----------------------------------------------------
8
+ Your custom static HTML goes in the body:
9
+ -->
10
+
11
+ <body>
12
+ <div class="inline-flex rounded-md shadow-sm" role="group">
13
+ <button id="up" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border border-gray-200 rounded-l-lg hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
14
+ <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
15
+ <path fill-rule="evenodd" d="M4.5 15.75l7.5-7.5 7.5 7.5" clip-rule="evenodd"></path>
16
+ </svg>
17
+ Up
18
+ </button>
19
+ <button id="down" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border-t border-b border-gray-200 hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
20
+ <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
21
+ <path d="M19.5 8.25l-7.5 7.5-7.5-7.5"></path>
22
+ </svg>
23
+ Down
24
+ </button>
25
+ <button id="save" type="button" onclick="call(this)" class="inline-flex items-center px-4 py-2 text-sm font-medium text-gray-900 bg-white border border-gray-200 rounded-r-md hover:bg-gray-100 hover:text-blue-700 focus:z-10 focus:ring-2 focus:ring-blue-700 focus:text-blue-700 dark:bg-gray-700 dark:border-gray-600 dark:text-white dark:hover:text-white dark:hover:bg-gray-600 dark:focus:ring-blue-500 dark:focus:text-white">
26
+ <svg aria-hidden="true" class="w-4 h-4 mr-2 fill-current" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
27
+ <path fill-rule="evenodd" d="M16.704 4.153a.75.75 0 01.143 1.052l-8 10.5a.75.75 0 01-1.127.075l-4.5-4.5a.75.75 0 011.06-1.06l3.894 3.893 7.48-9.817a.75.75 0 011.05-.143z" clip-rule="evenodd"></path>
28
+ </svg>
29
+ Save
30
+ </button>
31
+ </div>
32
+ </body>
33
+
34
+ <script type="text/javascript">
35
+ // ----------------------------------------------------
36
+ // Use these functions as is to perform required Streamlit
37
+ // component lifecycle actions:
38
+ //
39
+ // 1. Signal Streamlit client that component is ready
40
+ // 2. Signal Streamlit client to set visible height of the component
41
+ // (this is optional, in case Streamlit doesn't correctly auto-set it)
42
+ // 3. Pass values from component to Streamlit client
43
+ //
44
+
45
+ // Helper function to send type and data messages to Streamlit client
46
+
47
+ const SET_COMPONENT_VALUE = "streamlit:setComponentValue"
48
+ const RENDER = "streamlit:render"
49
+ const COMPONENT_READY = "streamlit:componentReady"
50
+ const SET_FRAME_HEIGHT = "streamlit:setFrameHeight"
51
+
52
+ function _sendMessage(type, data) {
53
+ // copy data into object
54
+ var outboundData = Object.assign({
55
+ isStreamlitMessage: true,
56
+ type: type,
57
+ }, data)
58
+
59
+ if (type == SET_COMPONENT_VALUE) {
60
+ console.log("_sendMessage data: " + JSON.stringify(data))
61
+ console.log("_sendMessage outboundData: " + JSON.stringify(outboundData))
62
+ }
63
+
64
+ window.parent.postMessage(outboundData, "*")
65
+ }
66
+
67
+ function initialize(pipeline) {
68
+
69
+ // Hook Streamlit's message events into a simple dispatcher of pipeline handlers
70
+ window.addEventListener("message", (event) => {
71
+ if (event.data.type == RENDER) {
72
+ // The event.data.args dict holds any JSON-serializable value
73
+ // sent from the Streamlit client. It is already deserialized.
74
+ pipeline.forEach(handler => {
75
+ handler(event.data.args)
76
+ })
77
+ }
78
+ })
79
+
80
+ _sendMessage(COMPONENT_READY, { apiVersion: 1 });
81
+
82
+ // Component should be mounted by Streamlit in an iframe, so try to autoset the iframe height.
83
+ window.addEventListener("load", () => {
84
+ window.setTimeout(function () {
85
+ setFrameHeight(document.documentElement.clientHeight)
86
+ }, 0)
87
+ })
88
+
89
+ // Optionally, if auto-height computation fails, you can manually set it
90
+ // (uncomment below)
91
+ //setFrameHeight(200)
92
+ }
93
+
94
+ function setFrameHeight(height) {
95
+ _sendMessage(SET_FRAME_HEIGHT, { height: height })
96
+ }
97
+
98
+ // The `data` argument can be any JSON-serializable value.
99
+ function notifyHost(data) {
100
+ _sendMessage(SET_COMPONENT_VALUE, data)
101
+ }
102
+
103
+ // ----------------------------------------------------
104
+ // Your custom functionality for the component goes here:
105
+
106
+ function call(button) {
107
+ timestamp = Date.now()
108
+ action = {
109
+ "action": button.id,
110
+ "timestamp": timestamp
111
+ }
112
+ notifyHost({
113
+ value: action,
114
+ dataType: "json",
115
+ })
116
+ }
117
+
118
+ // ----------------------------------------------------
119
+ // Here you can customize a pipeline of handlers for
120
+ // inbound properties from the Streamlit client app
121
+
122
+ // Set initial value sent from Streamlit!
123
+ function initializeProps_Handler(props) {
124
+ for (let key of Object.keys(props.buttons)) {
125
+ btn = document.getElementById(key)
126
+ btn.disabled = props.buttons[key]['disabled']
127
+ btn.style.display = props.buttons[key]['rendered']
128
+ }
129
+ }
130
+ // Access values sent from Streamlit!
131
+ function dataUpdate_Handler(props) {
132
+ // let msgLabel = document.getElementById("message_label")
133
+ // msgLabel.innerText = `Update [${props.counter}] at ${props.datetime}`
134
+ }
135
+ // Simply log received data dictionary
136
+ function log_Handler(props) {
137
+ console.log("Received from Streamlit: " + JSON.stringify(props))
138
+ }
139
+
140
+ let pipeline = [initializeProps_Handler, dataUpdate_Handler, log_Handler]
141
+
142
+ // ----------------------------------------------------
143
+ // Finally, initialize component passing in pipeline
144
+
145
+ initialize(pipeline)
146
+
147
+ </script>
148
+
149
+ </html>
toolbar/tools/agstyler.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # adjusted from: https://github.com/nryabykh/streamlit-aggrid-hints
2
+
3
+ from st_aggrid import AgGrid
4
+ from st_aggrid.grid_options_builder import GridOptionsBuilder
5
+ from st_aggrid.shared import GridUpdateMode, JsCode
6
+
7
+
8
+ def get_numeric_style_with_precision(precision: int) -> dict:
9
+ return {"type": ["numericColumn", "customNumericFormat"], "precision": precision}
10
+
11
+
12
+ PRECISION_ZERO = get_numeric_style_with_precision(0)
13
+ PRECISION_ONE = get_numeric_style_with_precision(1)
14
+ PRECISION_TWO = get_numeric_style_with_precision(2)
15
+ PINLEFT = {"pinned": "left"}
16
+
17
+
18
+ def draw_grid(
19
+ df,
20
+ formatter: dict = None,
21
+ selection="multiple",
22
+ use_checkbox=False,
23
+ fit_columns=False,
24
+ pagination_size=0,
25
+ theme="streamlit",
26
+ wrap_text: bool = False,
27
+ auto_height: bool = False,
28
+ grid_options: dict = None,
29
+ key=None,
30
+ css: dict = None
31
+ ):
32
+
33
+ gb = GridOptionsBuilder()
34
+ gb.configure_default_column(
35
+ filterable=True,
36
+ groupable=False,
37
+ editable=False,
38
+ wrapText=wrap_text,
39
+ autoHeight=auto_height
40
+ )
41
+
42
+ if grid_options is not None:
43
+ gb.configure_grid_options(**grid_options)
44
+
45
+ for latin_name, (cyr_name, style_dict) in formatter.items():
46
+ gb.configure_column(latin_name, header_name=cyr_name, **style_dict)
47
+
48
+ gb.configure_selection(selection_mode=selection, use_checkbox=use_checkbox)
49
+
50
+ if pagination_size > 0:
51
+ gb.configure_pagination(enabled=True, paginationAutoPageSize=False, paginationPageSize=pagination_size)
52
+
53
+ return AgGrid(
54
+ df,
55
+ gridOptions=gb.build(),
56
+ update_mode=GridUpdateMode.SELECTION_CHANGED | GridUpdateMode.VALUE_CHANGED,
57
+ allow_unsafe_jscode=True,
58
+ fit_columns_on_grid_load=fit_columns,
59
+ theme=theme,
60
+ key=key,
61
+ custom_css=css,
62
+ enable_enterprise_modules=False
63
+ )
64
+
65
+
66
+ def highlight(color, condition):
67
+ code = f"""
68
+ function(params) {{
69
+ color = "{color}";
70
+ if ({condition}) {{
71
+ return {{
72
+ 'backgroundColor': color
73
+ }}
74
+ }}
75
+ }};
76
+ """
77
+ return JsCode(code)
toolbar/tools/data_review.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from natsort import natsorted
3
+ import json
4
+
5
+
6
+ def annotation_review():
7
+ # get list of files in json directory
8
+ processed_file_names = get_processed_file_names('../docs/json/')
9
+ for file_name in processed_file_names:
10
+ # open json file
11
+ with open('../docs/json/' + file_name + '.json') as json_file:
12
+ json_file_data = json.load(json_file)
13
+ version = json_file_data['meta']['version']
14
+ if version == "v0.1":
15
+ print(file_name + " is v0.1")
16
+
17
+ def get_processed_file_names(dir_name):
18
+ # get ordered list of files without file extension, excluding hidden files, with JSON extension only
19
+ file_names = [os.path.splitext(f)[0] for f in os.listdir(dir_name) if
20
+ os.path.isfile(os.path.join(dir_name, f)) and not f.startswith('.') and f.endswith('.json')]
21
+ file_names = natsorted(file_names)
22
+ return file_names
23
+
24
+ def main():
25
+ annotation_review()
26
+
27
+
28
+ if __name__ == '__main__':
29
+ main()
toolbar/tools/st_functions.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+ def st_button(icon, url, label, iconsize):
5
+ if icon == 'youtube':
6
+ button_code = f'''
7
+ <p>
8
+ <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
9
+ <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-youtube" viewBox="0 0 16 16">
10
+ <path d="M8.051 1.999h.089c.822.003 4.987.033 6.11.335a2.01 2.01 0 0 1 1.415 1.42c.101.38.172.883.22 1.402l.01.104.022.26.008.104c.065.914.073 1.77.074 1.957v.075c-.001.194-.01 1.108-.082 2.06l-.008.105-.009.104c-.05.572-.124 1.14-.235 1.558a2.007 2.007 0 0 1-1.415 1.42c-1.16.312-5.569.334-6.18.335h-.142c-.309 0-1.587-.006-2.927-.052l-.17-.006-.087-.004-.171-.007-.171-.007c-1.11-.049-2.167-.128-2.654-.26a2.007 2.007 0 0 1-1.415-1.419c-.111-.417-.185-.986-.235-1.558L.09 9.82l-.008-.104A31.4 31.4 0 0 1 0 7.68v-.123c.002-.215.01-.958.064-1.778l.007-.103.003-.052.008-.104.022-.26.01-.104c.048-.519.119-1.023.22-1.402a2.007 2.007 0 0 1 1.415-1.42c.487-.13 1.544-.21 2.654-.26l.17-.007.172-.006.086-.003.171-.007A99.788 99.788 0 0 1 7.858 2h.193zM6.4 5.209v4.818l4.157-2.408L6.4 5.209z"/>
11
+ </svg>
12
+ {label}
13
+ </a>
14
+ </p>'''
15
+ elif icon == 'twitter':
16
+ button_code = f'''
17
+ <p>
18
+ <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
19
+ <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-twitter" viewBox="0 0 16 16">
20
+ <path d="M5.026 15c6.038 0 9.341-5.003 9.341-9.334 0-.14 0-.282-.006-.422A6.685 6.685 0 0 0 16 3.542a6.658 6.658 0 0 1-1.889.518 3.301 3.301 0 0 0 1.447-1.817 6.533 6.533 0 0 1-2.087.793A3.286 3.286 0 0 0 7.875 6.03a9.325 9.325 0 0 1-6.767-3.429 3.289 3.289 0 0 0 1.018 4.382A3.323 3.323 0 0 1 .64 6.575v.045a3.288 3.288 0 0 0 2.632 3.218 3.203 3.203 0 0 1-.865.115 3.23 3.23 0 0 1-.614-.057 3.283 3.283 0 0 0 3.067 2.277A6.588 6.588 0 0 1 .78 13.58a6.32 6.32 0 0 1-.78-.045A9.344 9.344 0 0 0 5.026 15z"/>
21
+ </svg>
22
+ {label}
23
+ </a>
24
+ </p>'''
25
+ elif icon == 'linkedin':
26
+ button_code = f'''
27
+ <p>
28
+ <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
29
+ <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-linkedin" viewBox="0 0 16 16">
30
+ <path d="M0 1.146C0 .513.526 0 1.175 0h13.65C15.474 0 16 .513 16 1.146v13.708c0 .633-.526 1.146-1.175 1.146H1.175C.526 16 0 15.487 0 14.854V1.146zm4.943 12.248V6.169H2.542v7.225h2.401zm-1.2-8.212c.837 0 1.358-.554 1.358-1.248-.015-.709-.52-1.248-1.342-1.248-.822 0-1.359.54-1.359 1.248 0 .694.521 1.248 1.327 1.248h.016zm4.908 8.212V9.359c0-.216.016-.432.08-.586.173-.431.568-.878 1.232-.878.869 0 1.216.662 1.216 1.634v3.865h2.401V9.25c0-2.22-1.184-3.252-2.764-3.252-1.274 0-1.845.7-2.165 1.193v.025h-.016a5.54 5.54 0 0 1 .016-.025V6.169h-2.4c.03.678 0 7.225 0 7.225h2.4z"/>
31
+ </svg>
32
+ {label}
33
+ </a>
34
+ </p>'''
35
+ elif icon == 'medium':
36
+ button_code = f'''
37
+ <p>
38
+ <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
39
+ <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-medium" viewBox="0 0 16 16">
40
+ <path d="M9.025 8c0 2.485-2.02 4.5-4.513 4.5A4.506 4.506 0 0 1 0 8c0-2.486 2.02-4.5 4.512-4.5A4.506 4.506 0 0 1 9.025 8zm4.95 0c0 2.34-1.01 4.236-2.256 4.236-1.246 0-2.256-1.897-2.256-4.236 0-2.34 1.01-4.236 2.256-4.236 1.246 0 2.256 1.897 2.256 4.236zM16 8c0 2.096-.355 3.795-.794 3.795-.438 0-.793-1.7-.793-3.795 0-2.096.355-3.795.794-3.795.438 0 .793 1.699.793 3.795z"/>
41
+ </svg>
42
+ {label}
43
+ </a>
44
+ </p>'''
45
+ elif icon == 'newsletter':
46
+ button_code = f'''
47
+ <p>
48
+ <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
49
+ <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-envelope" viewBox="0 0 16 16">
50
+ <path d="M0 4a2 2 0 0 1 2-2h12a2 2 0 0 1 2 2v8a2 2 0 0 1-2 2H2a2 2 0 0 1-2-2V4Zm2-1a1 1 0 0 0-1 1v.217l7 4.2 7-4.2V4a1 1 0 0 0-1-1H2Zm13 2.383-4.708 2.825L15 11.105V5.383Zm-.034 6.876-5.64-3.471L8 9.583l-1.326-.795-5.64 3.47A1 1 0 0 0 2 13h12a1 1 0 0 0 .966-.741ZM1 11.105l4.708-2.897L1 5.383v5.722Z"/>
51
+ </svg>
52
+ {label}
53
+ </a>
54
+ </p>'''
55
+ elif icon == 'github':
56
+ button_code = f'''
57
+ <p>
58
+ <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
59
+ <svg xmlns="http://www.w3.org/2000/svg" width={iconsize} height={iconsize} fill="currentColor" class="bi bi-github" viewBox="0 0 16 16">
60
+ <path d="M8 0C3.58 0 0 3.58 0 8c0 3.54 2.29 6.53 5.47 7.59.4.07.55-.17.55-.38 0-.19-.01-.82-.01-1.49-2.01.37-2.53-.49-2.69-.94-.09-.23-.48-.94-.82-1.13-.28-.15-.68-.52-.01-.53.63-.01 1.08.58 1.23.82.72 1.21 1.87.87 2.33.66.07-.52.28-.87.51-1.07-1.78-.2-3.64-.89-3.64-3.95 0-.87.31-1.59.82-2.15-.08-.2-.36-1.02.08-2.12 0 0 .67-.21 2.2.82.64-.18 1.32-.27 2-.27.68 0 1.36.09 2 .27 1.53-1.04 2.2-.82 2.2-.82.44 1.1.16 1.92.08 2.12.51.56.82 1.27.82 2.15 0 3.07-1.87 3.75-3.65 3.95.29.25.54.73.54 1.48 0 1.07-.01 1.93-.01 2.2 0 .21.15.46.55.38A8.012 8.012 0 0 0 16 8c0-4.42-3.58-8-8-8z"/>
61
+ </svg>
62
+ {label}
63
+ </a>
64
+ </p>'''
65
+ elif icon == '':
66
+ button_code = f'''
67
+ <p>
68
+ <a href={url} class="btn btn-outline-primary btn-lg btn-block" type="button" aria-pressed="true">
69
+ {label}
70
+ </a>
71
+ </p>'''
72
+ return st.markdown(button_code, unsafe_allow_html=True)
toolbar/tools/utilities.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+ def load_css():
5
+ with open("tools/style.css") as f:
6
+ st.markdown('<style>{}</style>'.format(f.read()), unsafe_allow_html=True)
7
+ st.markdown(
8
+ '<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/4.0.0/css/bootstrap.min.css" integrity="sha384-Gn5384xqQ1aoWXA+058RXPxPg6fy4IWvTNh0E263XmFcJlSAwiGgFAW/dAiS6JXm" crossorigin="anonymous">',
9
+ unsafe_allow_html=True)
toolbar/views/about.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ from tools.st_functions import st_button
4
+
5
+
6
+ class About:
7
+ class Model:
8
+ pageTitle = "About"
9
+
10
+ def view(self, model):
11
+ # st.title(model.pageTitle)
12
+
13
+ st.write(
14
+ "[![Star](https://img.shields.io/github/stars/katanaml/sparrow.svg?logo=github&style=social)](https://github.com/katanaml/sparrow)")
15
+
16
+ col1, col2, col3 = st.columns(3)
17
+ col2.image(Image.open('assets/ab.png'))
18
+
19
+ st.markdown("<h1 style='text-align: center; color: black; font-weight: bold;'>Andrej Baranovskij, Founder Katana ML</h1>",
20
+ unsafe_allow_html=True)
21
+
22
+ st.info(
23
+ 'Sparrow is a tool for data extraction from PDFs, images, and other documents. It is a part of Katana ML, '
24
+ 'a platform for data science and machine learning.')
25
+
26
+ icon_size = 20
27
+
28
+ st_button('youtube', 'https://www.youtube.com/@AndrejBaranovskij', 'Andrej Baranovskij YouTube channel', icon_size)
29
+ st_button('github', 'https://github.com/katanaml/sparrow', 'Sparrow GitHub', icon_size)
30
+ st_button('twitter', 'https://twitter.com/andrejusb', 'Follow me on Twitter', icon_size)
31
+ st_button('medium', 'https://andrejusb.medium.com', 'Read my Blogs on Medium', icon_size)
32
+ st_button('linkedin', 'https://www.linkedin.com/in/andrej-baranovskij/', 'Follow me on LinkedIn', icon_size)
33
+ st_button('', 'https://katanaml.io', 'Katana ML', icon_size)
toolbar/views/dashboard.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import json
5
+ import altair as alt
6
+ from pathlib import Path
7
+ import requests
8
+
9
+
10
+ class Dashboard:
11
+ class Model:
12
+ pageTitle = "Dashboard"
13
+
14
+ wordsTitle = "Words"
15
+
16
+ inferenceTimeTitle = "Inference Time"
17
+
18
+ documentsTitle = "Documents"
19
+
20
+ dailyInferenceTitle = "Top Daily Inference"
21
+
22
+ accuracyTitle = "Mean Accuracy"
23
+
24
+ titleModelEval = "## Evaluation Accuracy"
25
+ titleInferencePerformance = "## Inference Performance"
26
+ titleDatasetInfo = "## Dataset Info"
27
+ titleDataAnnotation = "## Data Annotation"
28
+ titleTrainingPerformance = "## Training Performance"
29
+ titleEvaluationPerformance = "## Evaluation Performance"
30
+
31
+ status_file = "docs/status.json"
32
+ annotation_files_dir = "docs/json"
33
+
34
+ def view(self, model):
35
+ # st.title(model.pageTitle)
36
+
37
+ api_url = "https://katanaml-org-sparrow-ml.hf.space/api-inference/v1/sparrow-ml/statistics"
38
+ json_data_inference = []
39
+ response = requests.get(api_url)
40
+ if response.status_code == 200:
41
+ json_data_inference = response.json()
42
+ else:
43
+ print(f"Error: Unable to fetch data from the API (status code {response.status_code})")
44
+
45
+ api_url_t = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/training"
46
+ json_data_training = []
47
+ response_t = requests.get(api_url_t)
48
+ if response_t.status_code == 200:
49
+ json_data_training = response_t.json()
50
+ else:
51
+ print(f"Error: Unable to fetch data from the API (status code {response_t.status_code})")
52
+
53
+ api_url_e = "https://katanaml-org-sparrow-ml.hf.space/api-training/v1/sparrow-ml/statistics/evaluate"
54
+ json_data_evaluate = []
55
+ response_e = requests.get(api_url_e)
56
+ if response_e.status_code == 200:
57
+ json_data_evaluate = response_e.json()
58
+ else:
59
+ print(f"Error: Unable to fetch data from the API (status code {response_e.status_code})")
60
+
61
+ with st.container():
62
+ col1, col2, col3, col4, col5 = st.columns(5)
63
+
64
+ with col1:
65
+ words_count = 0
66
+ delta_words = 0
67
+
68
+ if len(json_data_inference) > 3:
69
+ for i in range(0, len(json_data_inference)):
70
+ words_count = words_count + json_data_inference[i][1]
71
+
72
+ avg_word_count = words_count / len(json_data_inference)
73
+ avg_word_last = (json_data_inference[len(json_data_inference) - 1][1]
74
+ + json_data_inference[len(json_data_inference) - 2][1] +
75
+ json_data_inference[len(json_data_inference) - 3][1]) / 3
76
+
77
+ if avg_word_last >= avg_word_count:
78
+ delta_words = round(100 - ((avg_word_count * 100) / avg_word_last), 2)
79
+ else:
80
+ delta_words = round(100 - ((avg_word_last * 100) / avg_word_count), 2) * -1
81
+
82
+ words_count = words_count / 1000
83
+ st.metric(label=model.wordsTitle, value=str(words_count) + 'K', delta=str(delta_words) + "%")
84
+
85
+ with col2:
86
+ docs_count = len(json_data_inference)
87
+ delta_docs = 0
88
+
89
+ if docs_count > 3:
90
+ inference_dates = []
91
+ for i in range(0, len(json_data_inference)):
92
+ inference_dates.append(json_data_inference[i][4].split(" ")[0])
93
+
94
+ inference_dates_unique = []
95
+ for item in inference_dates:
96
+ if item not in inference_dates_unique:
97
+ inference_dates_unique.append(item)
98
+
99
+ if len(inference_dates_unique) > 3:
100
+ inference_dates_dict = {}
101
+ for i, key in enumerate(inference_dates_unique):
102
+ inference_dates_dict[key] = [0]
103
+
104
+ for i in range(0, len(json_data_inference)):
105
+ inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \
106
+ inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1
107
+
108
+ # calculate average for values from inference_dates_dict
109
+ avg_value = 0
110
+ for key, value in inference_dates_dict.items():
111
+ avg_value = avg_value + value[0]
112
+ avg_value = round(avg_value / len(inference_dates_dict), 2)
113
+
114
+ # calculate average for last 3 values from inference_dates_dict
115
+ avg_value_last = 0
116
+ for i in range(1, 4):
117
+ avg_value_last = avg_value_last + inference_dates_dict[inference_dates_unique[len(inference_dates_unique) - i]][0]
118
+ avg_value_last = round(avg_value_last / 3, 2)
119
+
120
+ if avg_value_last > avg_value:
121
+ delta_docs = round(100 - ((avg_value * 100) / avg_value_last), 2)
122
+ else:
123
+ delta_docs = round(100 - ((avg_value_last * 100) / avg_value), 2) * -1
124
+
125
+ st.metric(label=model.documentsTitle, value=docs_count, delta=str(delta_docs) + "%")
126
+
127
+ with col3:
128
+ inference_dates = []
129
+ for i in range(0, len(json_data_inference)):
130
+ inference_dates.append(json_data_inference[i][4].split(" ")[0])
131
+
132
+ inference_dates_unique = []
133
+ for item in inference_dates:
134
+ if item not in inference_dates_unique:
135
+ inference_dates_unique.append(item)
136
+
137
+ inference_dates_dict = {}
138
+ for i, key in enumerate(inference_dates_unique):
139
+ inference_dates_dict[key] = [0]
140
+
141
+ for i in range(0, len(json_data_inference)):
142
+ inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] = \
143
+ inference_dates_dict[json_data_inference[i][4].split(" ")[0]][0] + 1
144
+
145
+ # loop through the dictionary and find the max value
146
+ max_value = 0
147
+ for key, value in inference_dates_dict.items():
148
+ if value[0] > max_value:
149
+ max_value = value[0]
150
+
151
+ # calculate average for values from inference_dates_dict
152
+ avg_value = 0
153
+ for key, value in inference_dates_dict.items():
154
+ avg_value = avg_value + value[0]
155
+ avg_value = round(avg_value / len(inference_dates_dict), 2)
156
+
157
+ avg_delta = round(100 - ((avg_value * 100) / max_value), 2)
158
+
159
+ st.metric(label=model.dailyInferenceTitle, value=max_value, delta=str(avg_delta) + "%")
160
+
161
+ with col4:
162
+ inference_time_avg = 0
163
+
164
+ # calculate inference time average
165
+ for i in range(0, len(json_data_inference)):
166
+ inference_time_avg = inference_time_avg + json_data_inference[i][0]
167
+ inference_time_avg = round(inference_time_avg / len(json_data_inference), 2)
168
+
169
+ delta_time = 0
170
+ if len(json_data_inference) > 3:
171
+ avg_time_last = (json_data_inference[len(json_data_inference) - 1][0] +
172
+ json_data_inference[len(json_data_inference) - 2][0] +
173
+ json_data_inference[len(json_data_inference) - 3][0]) / 3
174
+
175
+ if avg_time_last > inference_time_avg:
176
+ delta_time = round(100 - ((inference_time_avg * 100) / avg_time_last), 2)
177
+ else:
178
+ delta_time = round(100 - ((avg_time_last * 100) / inference_time_avg), 2) * -1
179
+
180
+ st.metric(label=model.inferenceTimeTitle, value=str(inference_time_avg) + " s", delta=str(delta_time) + "%",
181
+ delta_color="inverse")
182
+
183
+ with col5:
184
+ models_unique = []
185
+ models_dict = {}
186
+ for i in range(0, len(json_data_evaluate)):
187
+ if json_data_evaluate[i][3] not in models_unique:
188
+ models_unique.append(json_data_evaluate[i][3])
189
+ models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['mean_accuracy']
190
+
191
+ avg_accuracy = 0
192
+ for key, value in models_dict.items():
193
+ avg_accuracy = avg_accuracy + value
194
+ avg_accuracy = round(avg_accuracy / len(models_dict), 2)
195
+
196
+ if len(models_unique) > 3:
197
+ # calculate average accuracy for last 3 values
198
+ avg_accuracy_last = 0
199
+ for i in range(1, 4):
200
+ avg_accuracy_last = avg_accuracy_last + models_dict[models_unique[len(models_unique) - i]]
201
+ avg_accuracy_last = round(avg_accuracy_last / 3, 2)
202
+ else:
203
+ avg_accuracy_last = avg_accuracy
204
+
205
+ if avg_accuracy_last > avg_accuracy:
206
+ delta_accuracy = round(100 - ((avg_accuracy * 100) / avg_accuracy_last), 2)
207
+ else:
208
+ delta_accuracy = round(100 - ((avg_accuracy_last * 100) / avg_accuracy), 2) * -1
209
+
210
+ st.metric(label=model.accuracyTitle, value=avg_accuracy, delta=str(delta_accuracy) + "%",
211
+ delta_color="inverse")
212
+
213
+ st.markdown("---")
214
+
215
+
216
+ with st.container():
217
+ col1, col2 = st.columns(2)
218
+
219
+ with col1:
220
+ st.write(model.titleInferencePerformance)
221
+
222
+ models_dict = {}
223
+
224
+ models = []
225
+ for i in range(0, len(json_data_inference)):
226
+ models.append(json_data_inference[i][3])
227
+
228
+ models_unique = []
229
+ for item in models:
230
+ if item not in models_unique:
231
+ models_unique.append(item)
232
+
233
+ for i, key in enumerate(models_unique):
234
+ models_dict[key] = []
235
+
236
+ for i in range(0, len(json_data_inference)):
237
+ models_dict[json_data_inference[i][3]].append(round(json_data_inference[i][0]))
238
+
239
+ data = pd.DataFrame(models_dict)
240
+ st.line_chart(data)
241
+
242
+ with col2:
243
+ st.write(model.titleModelEval)
244
+
245
+ models_unique = []
246
+ models_dict = {}
247
+ for i in range(0, len(json_data_evaluate)):
248
+ if json_data_evaluate[i][3] not in models_unique:
249
+ models_unique.append(json_data_evaluate[i][3])
250
+ models_dict[json_data_evaluate[i][3]] = json_data_evaluate[i][1]['accuracies']
251
+
252
+ data = pd.DataFrame(models_dict)
253
+ st.line_chart(data)
254
+
255
+ st.markdown("---")
256
+
257
+ with st.container():
258
+ col1, col2, col3 = st.columns(3)
259
+
260
+ with col1:
261
+ with st.container():
262
+ st.write(model.titleDataAnnotation)
263
+
264
+ total, completed, in_progress = self.calculate_annotation_stats(model)
265
+
266
+ data = pd.DataFrame({"Status": ["Completed", "In Progress"], "Value": [completed, in_progress]})
267
+
268
+ # Create a horizontal bar chart
269
+ chart = alt.Chart(data).mark_bar().encode(
270
+ x='Value:Q',
271
+ y=alt.Y('Status:N', sort='-x'),
272
+ color=alt.Color('Status:N', legend=None)
273
+ )
274
+
275
+ st.altair_chart(chart)
276
+ with col2:
277
+ with st.container():
278
+ st.write(model.titleDatasetInfo)
279
+
280
+ api_url = "https://katanaml-org-sparrow-data.hf.space/api-dataset/v1/sparrow-data/dataset_info"
281
+
282
+ # Make the GET request
283
+ response = requests.get(api_url)
284
+
285
+ # Check if the request was successful (status code 200)
286
+ names = []
287
+ rows = []
288
+ if response.status_code == 200:
289
+ # Convert the response content to a JSON object
290
+ json_data = response.json()
291
+
292
+ for i in range(0, len(json_data['splits'])):
293
+ names.append(json_data['splits'][i]['name'])
294
+ rows.append(json_data['splits'][i]['number_of_rows'])
295
+ else:
296
+ print(f"Error: Unable to fetch data from the API (status code {response.status_code})")
297
+
298
+ data = pd.DataFrame({"Dataset": names, "Value": rows})
299
+
300
+ # Create a horizontal bar chart
301
+ chart = alt.Chart(data).mark_bar().encode(
302
+ x='Value:Q',
303
+ y=alt.Y('Dataset:N', sort='-x'),
304
+ color=alt.Color('Dataset:N', legend=None)
305
+ )
306
+
307
+ st.altair_chart(chart)
308
+ with col3:
309
+ with st.container():
310
+ st.write(model.titleTrainingPerformance)
311
+
312
+ models_dict = {}
313
+
314
+ for i in range(0, len(json_data_training)):
315
+ models_dict[i] = round(json_data_training[i][0])
316
+
317
+ data = pd.DataFrame({"Runs": models_dict.keys(), "Value": list(models_dict.values())})
318
+
319
+ # Create a horizontal bar chart
320
+ chart = alt.Chart(data).mark_bar().encode(
321
+ x='Value:Q',
322
+ y=alt.Y('Runs:N', sort='-x'),
323
+ color=alt.Color('Runs:N', legend=None)
324
+ )
325
+
326
+ st.altair_chart(chart)
327
+
328
+ st.markdown("---")
329
+
330
+ with st.container():
331
+ st.write(model.titleEvaluationPerformance)
332
+
333
+ runs_dict = {}
334
+
335
+ for i in range(0, len(json_data_evaluate)):
336
+ runs_dict[i] = round(json_data_evaluate[i][0])
337
+
338
+ data = pd.DataFrame({"Runs": runs_dict.keys(), "Value": list(runs_dict.values())})
339
+
340
+ # Create a horizontal bar chart
341
+ chart = alt.Chart(data).mark_bar().encode(
342
+ x='Value:Q',
343
+ y=alt.Y('Runs:N', sort='-x'),
344
+ color=alt.Color('Runs:N', legend=None)
345
+ )
346
+
347
+ st.altair_chart(chart)
348
+
349
+
350
+ def calculate_annotation_stats(self, model):
351
+ completed = 0
352
+ in_progress = 0
353
+ data_dir_path = Path(model.annotation_files_dir)
354
+
355
+ for file_name in data_dir_path.glob("*.json"):
356
+ with open(file_name, "r") as f:
357
+ data = json.load(f)
358
+ v = data['meta']['version']
359
+ if v == 'v0.1':
360
+ in_progress += 1
361
+ else:
362
+ completed += 1
363
+ total = completed + in_progress
364
+
365
+ status_json = {
366
+ "annotations": [
367
+ {
368
+ "completed": completed,
369
+ "in_progress": in_progress,
370
+ "total": total
371
+ }
372
+ ]
373
+ }
374
+
375
+ with open(model.status_file, "w") as f:
376
+ json.dump(status_json, f, indent=2)
377
+
378
+ return total, completed, in_progress
toolbar/views/data_annotation.py ADDED
@@ -0,0 +1,692 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import streamlit_nested_layout
4
+ from streamlit_sparrow_labeling import st_sparrow_labeling
5
+ from streamlit_sparrow_labeling import DataProcessor
6
+ import json
7
+ import math
8
+ import os
9
+ from natsort import natsorted
10
+ from tools import agstyler
11
+ from tools.agstyler import PINLEFT
12
+ import pandas as pd
13
+ from toolbar_main import component_toolbar_main
14
+
15
+
16
+ class DataAnnotation:
17
+ class Model:
18
+ pageTitle = "Data Annotation"
19
+
20
+ img_file = None
21
+ rects_file = None
22
+ labels_file = "docs/labels.json"
23
+ groups_file = "docs/groups.json"
24
+
25
+ assign_labels_text = "Assign Labels"
26
+ text_caption_1 = "Check 'Assign Labels' to enable editing of labels and values, move and resize the boxes to annotate the document."
27
+ text_caption_2 = "Add annotations by clicking and dragging on the document, when 'Assign Labels' is unchecked."
28
+
29
+ labels = ["", "invoice_no", "invoice_date", "seller", "client", "seller_tax_id", "client_tax_id", "iban", "item_desc",
30
+ "item_qty", "item_net_price", "item_net_worth", "item_vat", "item_gross_worth", "total_net_worth", "total_vat",
31
+ "total_gross_worth"]
32
+
33
+ groups = ["", "items_row1", "items_row2", "items_row3", "items_row4", "items_row5", "items_row6", "items_row7",
34
+ "items_row8", "items_row9", "items_row10", "summary"]
35
+
36
+ selected_field = "Selected Field: "
37
+ save_text = "Save"
38
+ saved_text = "Saved!"
39
+
40
+ subheader_1 = "Select"
41
+ subheader_2 = "Upload"
42
+ annotation_text = "Annotation"
43
+ no_annotation_file = "No annotation file selected"
44
+ no_annotation_mapping = "Please annotate the document. Uncheck 'Assign Labels' and draw new annotations"
45
+
46
+ download_text = "Download"
47
+ download_hint = "Download the annotated structure in JSON format"
48
+
49
+ annotation_selection_help = "Select an annotation file to load"
50
+ upload_help = "Upload a file to annotate"
51
+ upload_button_text = "Upload"
52
+ upload_button_text_desc = "Choose a file"
53
+
54
+ assign_labels_text = "Assign Labels"
55
+ assign_labels_help = "Check to enable editing of labels and values"
56
+
57
+ export_labels_text = "Export Labels"
58
+ export_labels_help = "Create key-value pairs for the labels in JSON format"
59
+ done_text = "Done"
60
+
61
+ grouping_id = "ID"
62
+ grouping_value = "Value"
63
+
64
+ completed_text = "Completed"
65
+ completed_help = "Check to mark the annotation as completed"
66
+
67
+ error_text = "Value is too long. Please shorten it."
68
+ selection_must_be_continuous = "Please select continuous rows"
69
+
70
+ def view(self, model, ui_width, device_type, device_width):
71
+ with open(model.labels_file, "r") as f:
72
+ labels_json = json.load(f)
73
+
74
+ labels_list = labels_json["labels"]
75
+ labels = ['']
76
+ for label in labels_list:
77
+ labels.append(label['name'])
78
+ model.labels = labels
79
+
80
+ with open(model.groups_file, "r") as f:
81
+ groups_json = json.load(f)
82
+
83
+ groups_list = groups_json["groups"]
84
+ groups = ['']
85
+ for group in groups_list:
86
+ groups.append(group['name'])
87
+ model.groups = groups
88
+
89
+ with st.sidebar:
90
+ st.markdown("---")
91
+ st.subheader(model.subheader_1)
92
+
93
+ placeholder_upload = st.empty()
94
+
95
+ file_names = self.get_existing_file_names('docs/images/')
96
+
97
+ if 'annotation_index' not in st.session_state:
98
+ st.session_state['annotation_index'] = 0
99
+ annotation_index = 0
100
+ else:
101
+ annotation_index = st.session_state['annotation_index']
102
+
103
+ annotation_selection = placeholder_upload.selectbox(model.annotation_text, file_names,
104
+ index=annotation_index,
105
+ help=model.annotation_selection_help)
106
+
107
+ annotation_index = self.get_annotation_index(annotation_selection, file_names)
108
+
109
+ file_extension = self.get_file_extension(annotation_selection, 'docs/images/')
110
+ model.img_file = f"docs/images/{annotation_selection}" + file_extension
111
+ model.rects_file = f"docs/json/{annotation_selection}.json"
112
+
113
+ completed_check = st.empty()
114
+
115
+ btn = st.button(model.export_labels_text)
116
+ if btn:
117
+ self.export_labels(model)
118
+ st.write(model.done_text)
119
+
120
+ st.subheader(model.subheader_2)
121
+
122
+ with st.form("upload-form", clear_on_submit=True):
123
+ uploaded_file = st.file_uploader(model.upload_button_text_desc, accept_multiple_files=False,
124
+ type=['png', 'jpg', 'jpeg'],
125
+ help=model.upload_help)
126
+ submitted = st.form_submit_button(model.upload_button_text)
127
+
128
+ if submitted and uploaded_file is not None:
129
+ ret = self.upload_file(uploaded_file)
130
+
131
+ if ret is not False:
132
+ file_names = self.get_existing_file_names('docs/images/')
133
+
134
+ annotation_index = self.get_annotation_index(annotation_selection, file_names)
135
+ annotation_selection = placeholder_upload.selectbox(model.annotation_text, file_names,
136
+ index=annotation_index,
137
+ help=model.annotation_selection_help)
138
+ st.session_state['annotation_index'] = annotation_index
139
+
140
+ # st.title(model.pageTitle + " - " + annotation_selection)
141
+
142
+ if model.img_file is None:
143
+ st.caption(model.no_annotation_file)
144
+ return
145
+
146
+ saved_state = self.fetch_annotations(model.rects_file)
147
+
148
+ # annotation file has been changed
149
+ if annotation_index != st.session_state['annotation_index']:
150
+ annotation_v = saved_state['meta']['version']
151
+ if annotation_v == "v0.1":
152
+ st.session_state["annotation_done"] = False
153
+ else:
154
+ st.session_state["annotation_done"] = True
155
+ # store the annotation file index
156
+ st.session_state['annotation_index'] = annotation_index
157
+
158
+ # first load
159
+ if "annotation_done" not in st.session_state:
160
+ annotation_v = saved_state['meta']['version']
161
+ if annotation_v == "v0.1":
162
+ st.session_state["annotation_done"] = False
163
+ else:
164
+ st.session_state["annotation_done"] = True
165
+
166
+ with completed_check:
167
+ annotation_done = st.checkbox(model.completed_text, help=model.completed_help, key="annotation_done")
168
+ if annotation_done:
169
+ saved_state['meta']['version'] = "v1.0"
170
+ else:
171
+ saved_state['meta']['version'] = "v0.1"
172
+
173
+ with open(model.rects_file, "w") as f:
174
+ json.dump(saved_state, f, indent=2)
175
+ st.session_state[model.rects_file] = saved_state
176
+
177
+ assign_labels = st.checkbox(model.assign_labels_text, True, help=model.assign_labels_help)
178
+ mode = "transform" if assign_labels else "rect"
179
+
180
+ docImg = Image.open(model.img_file)
181
+
182
+ data_processor = DataProcessor()
183
+
184
+ with st.container():
185
+ doc_height = saved_state['meta']['image_size']['height']
186
+ doc_width = saved_state['meta']['image_size']['width']
187
+ canvas_width, number_of_columns = self.canvas_available_width(ui_width, doc_width, device_type,
188
+ device_width)
189
+
190
+ if number_of_columns > 1:
191
+ col1, col2 = st.columns([number_of_columns, 10 - number_of_columns])
192
+ with col1:
193
+ result_rects = self.render_doc(model, docImg, saved_state, mode, canvas_width, doc_height, doc_width)
194
+ with col2:
195
+ tab = st.radio("Select", ["Mapping", "Grouping", "Ordering"], horizontal=True,
196
+ label_visibility="collapsed")
197
+ if tab == "Mapping":
198
+ self.render_form(model, result_rects, data_processor, annotation_selection)
199
+ elif tab == "Grouping":
200
+ self.group_annotations(model, result_rects)
201
+ elif tab == "Ordering":
202
+ self.order_annotations(model, model.labels, model.groups, result_rects)
203
+ else:
204
+ result_rects = self.render_doc(model, docImg, saved_state, mode, canvas_width, doc_height, doc_width)
205
+ tab = st.radio("Select", ["Mapping", "Grouping"], horizontal=True, label_visibility="collapsed")
206
+ if tab == "Mapping":
207
+ self.render_form(model, result_rects, data_processor, annotation_selection)
208
+ else:
209
+ self.group_annotations(model, result_rects)
210
+
211
+ def render_doc(self, model, docImg, saved_state, mode, canvas_width, doc_height, doc_width):
212
+ with st.container():
213
+ height = 1296
214
+ width = 864
215
+
216
+ result_rects = st_sparrow_labeling(
217
+ fill_color="rgba(0, 151, 255, 0.3)",
218
+ stroke_width=2,
219
+ stroke_color="rgba(0, 50, 255, 0.7)",
220
+ background_image=docImg,
221
+ initial_rects=saved_state,
222
+ height=height,
223
+ width=width,
224
+ drawing_mode=mode,
225
+ display_toolbar=True,
226
+ update_streamlit=True,
227
+ canvas_width=canvas_width,
228
+ doc_height=doc_height,
229
+ doc_width=doc_width,
230
+ image_rescale=True,
231
+ key="doc_annotation" + model.img_file
232
+ )
233
+
234
+ st.caption(model.text_caption_1)
235
+ st.caption(model.text_caption_2)
236
+
237
+ return result_rects
238
+
239
+ def render_form(self, model, result_rects, data_processor, annotation_selection):
240
+ with st.container():
241
+ if result_rects is not None:
242
+ with st.form(key="fields_form"):
243
+ toolbar = st.empty()
244
+
245
+ self.render_form_view(result_rects.rects_data['words'], model.labels, result_rects,
246
+ data_processor)
247
+
248
+ with toolbar:
249
+ submit = st.form_submit_button(model.save_text, type="primary")
250
+ if submit:
251
+ for word in result_rects.rects_data['words']:
252
+ if len(word['value']) > 1000:
253
+ st.error(model.error_text)
254
+ return
255
+
256
+ with open(model.rects_file, "w") as f:
257
+ json.dump(result_rects.rects_data, f, indent=2)
258
+ st.session_state[model.rects_file] = result_rects.rects_data
259
+ # st.write(model.saved_text)
260
+ st.experimental_rerun()
261
+
262
+ if len(result_rects.rects_data['words']) == 0:
263
+ st.caption(model.no_annotation_mapping)
264
+ return
265
+ else:
266
+ with open(model.rects_file, 'rb') as file:
267
+ st.download_button(label=model.download_text,
268
+ data=file,
269
+ file_name=annotation_selection + ".json",
270
+ mime='application/json',
271
+ help=model.download_hint)
272
+
273
+ def render_form_view(self, words, labels, result_rects, data_processor):
274
+ data = []
275
+ for i, rect in enumerate(words):
276
+ group, label = rect['label'].split(":", 1) if ":" in rect['label'] else (None, rect['label'])
277
+ data.append({'id': i, 'value': rect['value'], 'label': label})
278
+ df = pd.DataFrame(data)
279
+
280
+ formatter = {
281
+ 'id': ('ID', {**PINLEFT, 'hide': True}),
282
+ 'value': ('Value', {**PINLEFT, 'editable': True}),
283
+ 'label': ('Label', {**PINLEFT,
284
+ 'width': 80,
285
+ 'editable': True,
286
+ 'cellEditor': 'agSelectCellEditor',
287
+ 'cellEditorParams': {
288
+ 'values': labels
289
+ }})
290
+ }
291
+
292
+ go = {
293
+ 'rowClassRules': {
294
+ 'row-selected': 'data.id === ' + str(result_rects.current_rect_index)
295
+ }
296
+ }
297
+
298
+ green_light = "#abf7b1"
299
+ css = {
300
+ '.row-selected': {
301
+ 'background-color': f'{green_light} !important'
302
+ }
303
+ }
304
+
305
+ response = agstyler.draw_grid(
306
+ df,
307
+ formatter=formatter,
308
+ fit_columns=True,
309
+ grid_options=go,
310
+ css=css
311
+ )
312
+
313
+ data = response['data'].values.tolist()
314
+
315
+ for i, rect in enumerate(words):
316
+ value = data[i][1]
317
+ label = data[i][2]
318
+ data_processor.update_rect_data(result_rects.rects_data, i, value, label)
319
+
320
+ def canvas_available_width(self, ui_width, doc_width, device_type, device_width):
321
+ doc_width_pct = (doc_width * 100) / ui_width
322
+ if doc_width_pct < 45:
323
+ canvas_width_pct = 37
324
+ elif doc_width_pct < 55:
325
+ canvas_width_pct = 49
326
+ else:
327
+ canvas_width_pct = 60
328
+
329
+ if ui_width > 700 and canvas_width_pct == 37 and device_type == "desktop":
330
+ return math.floor(canvas_width_pct * ui_width / 100), 4
331
+ elif ui_width > 700 and canvas_width_pct == 49 and device_type == "desktop":
332
+ return math.floor(canvas_width_pct * ui_width / 100), 5
333
+ elif ui_width > 700 and canvas_width_pct == 60 and device_type == "desktop":
334
+ return math.floor(canvas_width_pct * ui_width / 100), 6
335
+ else:
336
+ if device_type == "desktop":
337
+ ui_width = device_width - math.floor((device_width * 22) / 100)
338
+ elif device_type == "mobile":
339
+ ui_width = device_width - math.floor((device_width * 13) / 100)
340
+ return ui_width, 1
341
+
342
+ def fetch_annotations(self, rects_file):
343
+ for key in st.session_state:
344
+ if key.startswith("docs/json/") and key != rects_file:
345
+ del st.session_state[key]
346
+
347
+ if rects_file not in st.session_state:
348
+ with open(rects_file, "r") as f:
349
+ saved_state = json.load(f)
350
+ st.session_state[rects_file] = saved_state
351
+ else:
352
+ saved_state = st.session_state[rects_file]
353
+
354
+ return saved_state
355
+
356
+ def upload_file(self, uploaded_file):
357
+ if uploaded_file is not None:
358
+ if os.path.exists(os.path.join("docs/images/", uploaded_file.name)):
359
+ st.write("File already exists")
360
+ return False
361
+
362
+ if len(uploaded_file.name) > 100:
363
+ st.write("File name too long")
364
+ return False
365
+
366
+ with open(os.path.join("docs/images/", uploaded_file.name), "wb") as f:
367
+ f.write(uploaded_file.getbuffer())
368
+
369
+ img_file = Image.open(os.path.join("docs/images/", uploaded_file.name))
370
+
371
+ annotations_json = {
372
+ "meta": {
373
+ "version": "v0.1",
374
+ "split": "train",
375
+ "image_id": len(self.get_existing_file_names("docs/images/")),
376
+ "image_size": {
377
+ "width": img_file.width,
378
+ "height": img_file.height
379
+ }
380
+ },
381
+ "words": []
382
+ }
383
+
384
+ file_name = uploaded_file.name.split(".")[0]
385
+ with open(os.path.join("docs/json/", file_name + ".json"), "w") as f:
386
+ json.dump(annotations_json, f, indent=2)
387
+
388
+ st.success("File uploaded successfully")
389
+
390
+ def get_existing_file_names(self, dir_name):
391
+ # get ordered list of files without file extension, excluding hidden files
392
+ return natsorted([os.path.splitext(f)[0] for f in os.listdir(dir_name) if not f.startswith('.')])
393
+
394
+ def get_file_extension(self, file_name, dir_name):
395
+ # get list of files, excluding hidden files
396
+ files = [f for f in os.listdir(dir_name) if not f.startswith('.')]
397
+ for f in files:
398
+ if file_name is not None and os.path.splitext(f)[0] == file_name:
399
+ return os.path.splitext(f)[1]
400
+
401
+ def get_annotation_index(self, file, files_list):
402
+ return files_list.index(file)
403
+
404
+
405
+ def group_annotations(self, model, result_rects):
406
+ with st.form(key="grouping_form"):
407
+ if result_rects is not None:
408
+ words = result_rects.rects_data['words']
409
+ data = []
410
+ for i, rect in enumerate(words):
411
+ data.append({'id': i, 'value': rect['value']})
412
+ df = pd.DataFrame(data)
413
+
414
+ formatter = {
415
+ 'id': ('ID', {**PINLEFT, 'width': 50}),
416
+ 'value': ('Value', PINLEFT)
417
+ }
418
+
419
+ toolbar = st.empty()
420
+
421
+ response = agstyler.draw_grid(
422
+ df,
423
+ formatter=formatter,
424
+ fit_columns=True,
425
+ selection='multiple',
426
+ use_checkbox='True',
427
+ pagination_size=40
428
+ )
429
+
430
+ rows = response['selected_rows']
431
+
432
+ with toolbar:
433
+ submit = st.form_submit_button(model.save_text, type="primary")
434
+ if submit and len(rows) > 0:
435
+ # check if there are gaps in the selected rows
436
+ if len(rows) > 1:
437
+ for i in range(len(rows) - 1):
438
+ if rows[i]['id'] + 1 != rows[i + 1]['id']:
439
+ st.error(model.selection_must_be_continuous)
440
+ return
441
+
442
+ words = result_rects.rects_data['words']
443
+ new_words_list = []
444
+ coords = []
445
+ for row in rows:
446
+ word_value = words[row['id']]['value']
447
+ rect = words[row['id']]['rect']
448
+ coords.append(rect)
449
+ new_words_list.append(word_value)
450
+ # convert array to string
451
+ new_word = " ".join(new_words_list)
452
+
453
+ # Get min x1 value from coords array
454
+ x1_min = min([coord['x1'] for coord in coords])
455
+ y1_min = min([coord['y1'] for coord in coords])
456
+ x2_max = max([coord['x2'] for coord in coords])
457
+ y2_max = max([coord['y2'] for coord in coords])
458
+
459
+
460
+ words[rows[0]['id']]['value'] = new_word
461
+ words[rows[0]['id']]['rect'] = {
462
+ "x1": x1_min,
463
+ "y1": y1_min,
464
+ "x2": x2_max,
465
+ "y2": y2_max
466
+ }
467
+
468
+ # loop array in reverse order and remove selected entries
469
+ i = 0
470
+ for row in rows[::-1]:
471
+ if i == len(rows) - 1:
472
+ break
473
+ del words[row['id']]
474
+ i += 1
475
+
476
+ result_rects.rects_data['words'] = words
477
+
478
+ with open(model.rects_file, "w") as f:
479
+ json.dump(result_rects.rects_data, f, indent=2)
480
+ st.session_state[model.rects_file] = result_rects.rects_data
481
+ st.experimental_rerun()
482
+
483
+
484
+ def order_annotations(self, model, labels, groups, result_rects):
485
+ if result_rects is not None:
486
+ self.action_event = None
487
+ data = []
488
+ idx_list = [""]
489
+ words = result_rects.rects_data['words']
490
+ for i, rect in enumerate(words):
491
+ if rect['label'] != "":
492
+ # split string into two variables, assign None to first variable if no split is found
493
+ group, label = rect['label'].split(":", 1) if ":" in rect['label'] else (None, rect['label'])
494
+ data.append({'id': i, 'value': rect['value'], 'label': label, 'group': group})
495
+ idx_list.append(i)
496
+ df = pd.DataFrame(data)
497
+
498
+ formatter = {
499
+ 'id': ('ID', {**PINLEFT, 'width': 50}),
500
+ 'value': ('Value', {**PINLEFT}),
501
+ 'label': ('Label', {**PINLEFT,
502
+ 'width': 80,
503
+ 'editable': False,
504
+ 'cellEditor': 'agSelectCellEditor',
505
+ 'cellEditorParams': {
506
+ 'values': labels
507
+ }}),
508
+ 'group': ('Group', {**PINLEFT,
509
+ 'width': 80,
510
+ 'editable': True,
511
+ 'cellEditor': 'agSelectCellEditor',
512
+ 'cellEditorParams': {
513
+ 'values': groups
514
+ }})
515
+ }
516
+
517
+ go = {
518
+ 'rowClassRules': {
519
+ 'row-selected': 'data.id === ' + str(result_rects.current_rect_index)
520
+ }
521
+ }
522
+
523
+ green_light = "#abf7b1"
524
+ css = {
525
+ '.row-selected': {
526
+ 'background-color': f'{green_light} !important'
527
+ }
528
+ }
529
+
530
+ idx_option = st.selectbox('Select row to move into', idx_list)
531
+
532
+ def run_component(props):
533
+ value = component_toolbar_main(key='toolbar_main', **props)
534
+ return value
535
+
536
+ def handle_event(value):
537
+ if value is not None:
538
+ if 'action_timestamp' not in st.session_state:
539
+ self.action_event = value['action']
540
+ st.session_state['action_timestamp'] = value['timestamp']
541
+ else:
542
+ if st.session_state['action_timestamp'] != value['timestamp']:
543
+ self.action_event = value['action']
544
+ st.session_state['action_timestamp'] = value['timestamp']
545
+ else:
546
+ self.action_event = None
547
+
548
+ props = {
549
+ 'buttons': {
550
+ 'up': {
551
+ 'disabled': False,
552
+ 'rendered': ''
553
+ },
554
+ 'down': {
555
+ 'disabled': False,
556
+ 'rendered': ''
557
+ },
558
+ 'save': {
559
+ 'disabled': False,
560
+ 'rendered': ''
561
+ # 'rendered': 'none',
562
+ }
563
+ }
564
+ }
565
+
566
+ handle_event(run_component(props))
567
+
568
+ response = agstyler.draw_grid(
569
+ df,
570
+ formatter=formatter,
571
+ fit_columns=True,
572
+ grid_options=go,
573
+ css=css
574
+ )
575
+
576
+ rows = response['selected_rows']
577
+ if len(rows) == 0 and result_rects.current_rect_index > -1:
578
+ for i, row in enumerate(data):
579
+ if row['id'] == result_rects.current_rect_index:
580
+ rows = [
581
+ {
582
+ '_selectedRowNodeInfo': {
583
+ 'nodeRowIndex': i
584
+ },
585
+ 'id': row['id']
586
+ }
587
+ ]
588
+ break
589
+
590
+ if str(self.action_event) == 'up':
591
+ if len(rows) > 0:
592
+ idx = rows[0]['_selectedRowNodeInfo']['nodeRowIndex']
593
+ if idx > 0:
594
+ row_id = rows[0]['id']
595
+ if row_id == idx_option:
596
+ return
597
+ # swap row upwards in the array
598
+ if idx_option == "":
599
+ words[row_id], words[row_id - 1] = words[row_id - 1], words[row_id]
600
+ else:
601
+ for i in range(1000):
602
+ words[row_id], words[row_id - 1] = words[row_id - 1], words[row_id]
603
+ row_id -= 1
604
+ if row_id == idx_option:
605
+ break
606
+
607
+ result_rects.rects_data['words'] = words
608
+
609
+ with open(model.rects_file, "w") as f:
610
+ json.dump(result_rects.rects_data, f, indent=2)
611
+ st.session_state[model.rects_file] = result_rects.rects_data
612
+ st.experimental_rerun()
613
+ elif str(self.action_event) == 'down':
614
+ if len(rows) > 0:
615
+ idx = rows[0]['_selectedRowNodeInfo']['nodeRowIndex']
616
+ if idx < len(df) - 1:
617
+ row_id = rows[0]['id']
618
+ if row_id == idx_option:
619
+ return
620
+ # swap row downwards in the array
621
+ if idx_option == "":
622
+ words[row_id], words[row_id + 1] = words[row_id + 1], words[row_id]
623
+ else:
624
+ for i in range(1000):
625
+ words[row_id], words[row_id + 1] = words[row_id + 1], words[row_id]
626
+ row_id += 1
627
+ if row_id == idx_option:
628
+ break
629
+
630
+ result_rects.rects_data['words'] = words
631
+
632
+ with open(model.rects_file, "w") as f:
633
+ json.dump(result_rects.rects_data, f, indent=2)
634
+ st.session_state[model.rects_file] = result_rects.rects_data
635
+ st.experimental_rerun()
636
+ elif str(self.action_event) == 'save':
637
+ data = response['data'].values.tolist()
638
+ for elem in data:
639
+ if elem[3] != "None":
640
+ idx = elem[0]
641
+ group = elem[3]
642
+ words[idx]['label'] = f"{group}:{elem[2]}"
643
+
644
+ result_rects.rects_data['words'] = words
645
+
646
+ with open(model.rects_file, "w") as f:
647
+ json.dump(result_rects.rects_data, f, indent=2)
648
+ st.session_state[model.rects_file] = result_rects.rects_data
649
+ st.experimental_rerun()
650
+
651
+
652
+ def export_labels(self, model):
653
+ path_from = os.path.join("docs/json/")
654
+ path_to = os.path.join("docs/json/key/")
655
+
656
+ files = [f for f in os.listdir(path_from) if not f.startswith('.')]
657
+ for file in files:
658
+ path = os.path.join(path_from, file)
659
+ if os.path.isfile(path):
660
+ with open(path, "r") as f:
661
+ data = json.load(f)
662
+ words = data['words']
663
+
664
+ keys = {}
665
+ row_keys = {}
666
+
667
+ for word in words:
668
+ if word['label'] != '':
669
+ if ':' in word['label']:
670
+ group, label = word['label'].split(':', 1)
671
+ if 'row' not in group:
672
+ if group not in keys:
673
+ keys[group] = {}
674
+ keys[group][label] = word['value']
675
+ else:
676
+ if "items" not in keys:
677
+ keys["items"] = []
678
+
679
+ if group not in row_keys:
680
+ row_keys[group] = {}
681
+ row_keys[group][label] = word['value']
682
+ else:
683
+ keys[word['label']] = word['value']
684
+
685
+ if row_keys != {}:
686
+ for key in row_keys:
687
+ keys["items"].append(row_keys[key])
688
+
689
+ if keys != {}:
690
+ path = os.path.join(path_to, file)
691
+ with open(path, "w") as f:
692
+ json.dump(keys, f, indent=2)
toolbar/views/data_inference.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ from PIL import Image
5
+ import math
6
+ from streamlit_sparrow_labeling import st_sparrow_labeling
7
+ import requests
8
+ from config import settings
9
+ import json
10
+
11
+
12
+ class DataInference:
13
+ class Model:
14
+ # pageTitle = "Data Inference"
15
+ subheader_2 = "Upload"
16
+ initial_msg = "Please upload a file for inference"
17
+
18
+ upload_help = "Upload a file to extract data from it"
19
+ upload_button_text = "Upload"
20
+ upload_button_text_desc = "Choose a file"
21
+
22
+ extract_data = "Extract Data"
23
+
24
+ model_in_use = "donut"
25
+
26
+ img_file = None
27
+
28
+ def set_image_file(self, img_file):
29
+ st.session_state['img_file'] = img_file
30
+
31
+ def get_image_file(self):
32
+ if 'img_file' not in st.session_state:
33
+ return None
34
+ return st.session_state['img_file']
35
+
36
+ data_result = None
37
+
38
+ def set_data_result(self, data_result):
39
+ st.session_state['data_result'] = data_result
40
+
41
+ def get_data_result(self):
42
+ if 'data_result' not in st.session_state:
43
+ return None
44
+ return st.session_state['data_result']
45
+
46
+ def view(self, model, ui_width, device_type, device_width):
47
+ # st.title(model.pageTitle)
48
+
49
+ with st.sidebar:
50
+ st.markdown("---")
51
+ st.subheader(model.subheader_2)
52
+
53
+ with st.form("upload-form", clear_on_submit=True):
54
+ uploaded_file = st.file_uploader(model.upload_button_text_desc, accept_multiple_files=False,
55
+ type=['png', 'jpg', 'jpeg'],
56
+ help=model.upload_help)
57
+ submitted = st.form_submit_button(model.upload_button_text)
58
+
59
+ if submitted and uploaded_file is not None:
60
+ ret = self.upload_file(uploaded_file)
61
+
62
+ if ret is not False:
63
+ model.set_image_file(ret)
64
+ model.set_data_result(None)
65
+
66
+ if model.get_image_file() is not None:
67
+ doc_img = Image.open(model.get_image_file())
68
+ doc_height = doc_img.height
69
+ doc_width = doc_img.width
70
+
71
+ canvas_width, number_of_columns = self.canvas_available_width(ui_width, doc_width, device_type,
72
+ device_width)
73
+
74
+ if number_of_columns > 1:
75
+ col1, col2 = st.columns([number_of_columns, 10 - number_of_columns])
76
+ with col1:
77
+ self.render_doc(model, doc_img, canvas_width, doc_height, doc_width)
78
+ with col2:
79
+ self.render_results(model)
80
+ else:
81
+ self.render_doc(model, doc_img, canvas_width, doc_height, doc_width)
82
+ self.render_results(model)
83
+ else:
84
+ st.title(model.initial_msg)
85
+
86
+ def upload_file(self, uploaded_file):
87
+ timestamp = str(time.time())
88
+ timestamp = timestamp.replace(".", "")
89
+
90
+ file_name, file_extension = os.path.splitext(uploaded_file.name)
91
+ uploaded_file.name = file_name + "_" + timestamp + file_extension
92
+
93
+ if os.path.exists(os.path.join("docs/inference/", uploaded_file.name)):
94
+ st.write("File already exists")
95
+ return False
96
+
97
+ if len(uploaded_file.name) > 500:
98
+ st.write("File name too long")
99
+ return False
100
+
101
+ with open(os.path.join("docs/inference/", uploaded_file.name), "wb") as f:
102
+ f.write(uploaded_file.getbuffer())
103
+
104
+ st.success("File uploaded successfully")
105
+
106
+ return os.path.join("docs/inference/", uploaded_file.name)
107
+
108
+ def canvas_available_width(self, ui_width, doc_width, device_type, device_width):
109
+ doc_width_pct = (doc_width * 100) / ui_width
110
+ if doc_width_pct < 45:
111
+ canvas_width_pct = 37
112
+ elif doc_width_pct < 55:
113
+ canvas_width_pct = 49
114
+ else:
115
+ canvas_width_pct = 60
116
+
117
+ if ui_width > 700 and canvas_width_pct == 37 and device_type == "desktop":
118
+ return math.floor(canvas_width_pct * ui_width / 100), 4
119
+ elif ui_width > 700 and canvas_width_pct == 49 and device_type == "desktop":
120
+ return math.floor(canvas_width_pct * ui_width / 100), 5
121
+ elif ui_width > 700 and canvas_width_pct == 60 and device_type == "desktop":
122
+ return math.floor(canvas_width_pct * ui_width / 100), 6
123
+ else:
124
+ if device_type == "desktop":
125
+ ui_width = device_width - math.floor((device_width * 22) / 100)
126
+ elif device_type == "mobile":
127
+ ui_width = device_width - math.floor((device_width * 13) / 100)
128
+ return ui_width, 1
129
+
130
+ def render_doc(self, model, doc_img, canvas_width, doc_height, doc_width):
131
+ height = 1296
132
+ width = 864
133
+
134
+ annotations_json = {
135
+ "meta": {
136
+ "version": "v0.1",
137
+ "split": "train",
138
+ "image_id": 0,
139
+ "image_size": {
140
+ "width": doc_width,
141
+ "height": doc_height
142
+ }
143
+ },
144
+ "words": []
145
+ }
146
+
147
+ st_sparrow_labeling(
148
+ fill_color="rgba(0, 151, 255, 0.3)",
149
+ stroke_width=2,
150
+ stroke_color="rgba(0, 50, 255, 0.7)",
151
+ background_image=doc_img,
152
+ initial_rects=annotations_json,
153
+ height=height,
154
+ width=width,
155
+ drawing_mode="transform",
156
+ display_toolbar=False,
157
+ update_streamlit=False,
158
+ canvas_width=canvas_width,
159
+ doc_height=doc_height,
160
+ doc_width=doc_width,
161
+ image_rescale=True,
162
+ key="doc_annotation" + model.get_image_file()
163
+ )
164
+
165
+ def render_results(self, model):
166
+ with st.form(key="results_form"):
167
+ button_placeholder = st.empty()
168
+
169
+ submit = button_placeholder.form_submit_button(model.extract_data, type="primary")
170
+ if 'inference_error' in st.session_state:
171
+ st.error(st.session_state.inference_error)
172
+ del st.session_state.inference_error
173
+
174
+ if submit:
175
+ button_placeholder.empty()
176
+
177
+ api_url = "https://katanaml-org-sparrow-ml.hf.space/api-inference/v1/sparrow-ml/inference"
178
+ file_path = model.get_image_file()
179
+
180
+ with open(file_path, "rb") as file:
181
+ model_in_use = model.model_in_use
182
+ sparrow_key = settings.sparrow_key
183
+
184
+ # Prepare the payload
185
+ files = {
186
+ 'file': (file.name, file, 'image/jpeg')
187
+ }
188
+
189
+ data = {
190
+ 'image_url': '',
191
+ 'model_in_use': model_in_use,
192
+ 'sparrow_key': sparrow_key
193
+ }
194
+
195
+ with st.spinner("Extracting data from document..."):
196
+ response = requests.post(api_url, data=data, files=files, timeout=180)
197
+ if response.status_code != 200:
198
+ print('Request failed with status code:', response.status_code)
199
+ print('Response:', response.text)
200
+
201
+ st.session_state["inference_error"] = "Error extracting data from document"
202
+ st.experimental_rerun()
203
+
204
+ model.set_data_result(response.text)
205
+
206
+ # Display JSON data in Streamlit
207
+ st.markdown("---")
208
+ st.json(response.text)
209
+
210
+ # replace file extension to json
211
+ file_path = file_path.replace(".jpg", ".json")
212
+ with open(file_path, "w") as f:
213
+ json.dump(response.text, f, indent=2)
214
+
215
+ st.experimental_rerun()
216
+ else:
217
+ if model.get_data_result() is not None:
218
+ st.markdown("---")
219
+ st.json(model.get_data_result())
toolbar/views/data_review.py ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from natsort import natsorted
3
+ import os
4
+ from PIL import Image
5
+ import math
6
+ from streamlit_sparrow_labeling import st_sparrow_labeling
7
+ import json
8
+
9
+
10
+ class DataReview:
11
+ class Model:
12
+ # pageTitle = "Data Review"
13
+ subheader_2 = "Select"
14
+ subheader_3 = "Result"
15
+ selection_text = "File to review"
16
+ initial_msg = "Please select a file to review"
17
+
18
+ img_file = None
19
+
20
+ def set_image_file(self, img_file):
21
+ st.session_state['img_file_review'] = img_file
22
+
23
+ def get_image_file(self):
24
+ if 'img_file_review' not in st.session_state:
25
+ return None
26
+ return st.session_state['img_file_review']
27
+
28
+ json_file = None
29
+
30
+ def set_json_file(self, json_file):
31
+ st.session_state['json_file_review'] = json_file
32
+
33
+ def get_json_file(self):
34
+ if 'json_file_review' not in st.session_state:
35
+ return None
36
+ return st.session_state['json_file_review']
37
+
38
+ def view(self, model, ui_width, device_type, device_width):
39
+ # st.title(model.pageTitle)
40
+
41
+ with st.sidebar:
42
+ st.markdown("---")
43
+ st.subheader(model.subheader_2)
44
+
45
+ # get list of files in inference directory
46
+ processed_file_names = self.get_processed_file_names('docs/inference/')
47
+
48
+ if 'selection_index' not in st.session_state:
49
+ st.session_state['selection_index'] = 0
50
+ selection_index = 0
51
+ else:
52
+ selection_index = st.session_state['selection_index']
53
+
54
+ selection = st.selectbox(model.selection_text, processed_file_names, index=selection_index)
55
+
56
+ selection_index = self.get_selection_index(selection, processed_file_names)
57
+ st.session_state['selection_index'] = selection_index
58
+
59
+ img_file = "docs/inference/" + selection + ".jpg"
60
+ json_file = "docs/inference/" + selection + ".json"
61
+
62
+ model.set_image_file(img_file)
63
+ model.set_json_file(json_file)
64
+
65
+ if model.get_image_file() is not None:
66
+ doc_img = Image.open(model.get_image_file())
67
+ doc_height = doc_img.height
68
+ doc_width = doc_img.width
69
+
70
+ canvas_width, number_of_columns = self.canvas_available_width(ui_width, doc_width, device_type,
71
+ device_width)
72
+
73
+ if number_of_columns > 1:
74
+ col1, col2 = st.columns([number_of_columns, 10 - number_of_columns])
75
+ with col1:
76
+ pass
77
+ self.render_doc(model, doc_img, canvas_width, doc_height, doc_width)
78
+ with col2:
79
+ pass
80
+ self.render_results(model)
81
+ else:
82
+ pass
83
+ self.render_doc(model, doc_img, canvas_width, doc_height, doc_width)
84
+ self.render_results(model)
85
+ else:
86
+ st.title(model.initial_msg)
87
+
88
+
89
+ def get_processed_file_names(self, dir_name):
90
+ # get ordered list of files without file extension, excluding hidden files, with JSON extension only
91
+ file_names = [os.path.splitext(f)[0] for f in os.listdir(dir_name) if
92
+ os.path.isfile(os.path.join(dir_name, f)) and not f.startswith('.') and f.endswith('.json')]
93
+ file_names = natsorted(file_names)
94
+ return file_names
95
+
96
+ def get_selection_index(self, file, files_list):
97
+ return files_list.index(file)
98
+
99
+ def canvas_available_width(self, ui_width, doc_width, device_type, device_width):
100
+ doc_width_pct = (doc_width * 100) / ui_width
101
+ if doc_width_pct < 45:
102
+ canvas_width_pct = 37
103
+ elif doc_width_pct < 55:
104
+ canvas_width_pct = 49
105
+ else:
106
+ canvas_width_pct = 60
107
+
108
+ if ui_width > 700 and canvas_width_pct == 37 and device_type == "desktop":
109
+ return math.floor(canvas_width_pct * ui_width / 100), 4
110
+ elif ui_width > 700 and canvas_width_pct == 49 and device_type == "desktop":
111
+ return math.floor(canvas_width_pct * ui_width / 100), 5
112
+ elif ui_width > 700 and canvas_width_pct == 60 and device_type == "desktop":
113
+ return math.floor(canvas_width_pct * ui_width / 100), 6
114
+ else:
115
+ if device_type == "desktop":
116
+ ui_width = device_width - math.floor((device_width * 22) / 100)
117
+ elif device_type == "mobile":
118
+ ui_width = device_width - math.floor((device_width * 13) / 100)
119
+ return ui_width, 1
120
+
121
+
122
+ def render_doc(self, model, doc_img, canvas_width, doc_height, doc_width):
123
+ height = 1296
124
+ width = 864
125
+
126
+ annotations_json = {
127
+ "meta": {
128
+ "version": "v0.1",
129
+ "split": "train",
130
+ "image_id": 0,
131
+ "image_size": {
132
+ "width": doc_width,
133
+ "height": doc_height
134
+ }
135
+ },
136
+ "words": []
137
+ }
138
+
139
+ st_sparrow_labeling(
140
+ fill_color="rgba(0, 151, 255, 0.3)",
141
+ stroke_width=2,
142
+ stroke_color="rgba(0, 50, 255, 0.7)",
143
+ background_image=doc_img,
144
+ initial_rects=annotations_json,
145
+ height=height,
146
+ width=width,
147
+ drawing_mode="transform",
148
+ display_toolbar=False,
149
+ update_streamlit=False,
150
+ canvas_width=canvas_width,
151
+ doc_height=doc_height,
152
+ doc_width=doc_width,
153
+ image_rescale=True,
154
+ key="doc_annotation" + model.get_image_file()
155
+ )
156
+
157
+ def render_results(self, model):
158
+ json_file = model.get_json_file()
159
+ if json_file is not None:
160
+ with open(json_file) as f:
161
+ data_json = json.load(f)
162
+ st.subheader(model.subheader_3)
163
+ st.markdown("---")
164
+ st.json(data_json)
165
+ st.markdown("---")
toolbar/views/model_training.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+ class ModelTraining:
5
+ class Model:
6
+ pageTitle = "Model Training"
7
+
8
+ def view(self, model):
9
+ st.title(model.pageTitle)
toolbar/views/model_tuning.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+
4
+ class ModelTuning:
5
+ class Model:
6
+ pageTitle = "Model Tuning"
7
+
8
+ def view(self, model):
9
+ st.title(model.pageTitle)
toolbar/views/setup.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import json
3
+ import pandas as pd
4
+ from tools import agstyler
5
+ from tools.agstyler import PINLEFT
6
+ from toolbar import component_toolbar_buttons
7
+
8
+
9
+ class Setup:
10
+ class Model:
11
+ header1 = "Labels"
12
+ header2 = "Groups"
13
+ labels_file = "docs/labels.json"
14
+ groups_file = "docs/groups.json"
15
+
16
+ def view(self, model):
17
+ tab = st.radio("Select", ["Labels", "Groups"], horizontal=True, label_visibility="collapsed")
18
+
19
+ if tab == "Labels":
20
+ st.title(model.header1)
21
+ self.setup_labels(model)
22
+ elif tab == "Groups":
23
+ st.title(model.header2)
24
+ self.setup_groups(model)
25
+
26
+ def setup_labels(self, model):
27
+ self.action_event = False
28
+ if 'action' not in st.session_state:
29
+ st.session_state['action'] = None
30
+
31
+ with open(model.labels_file, "r") as f:
32
+ labels_json = json.load(f)
33
+
34
+ labels = labels_json["labels"]
35
+
36
+ data = []
37
+ for label in labels:
38
+ data.append({'id': label['id'], 'name': label['name'], 'description': label['description']})
39
+ self.df = pd.DataFrame(data)
40
+
41
+ formatter = {
42
+ 'id': ('ID', {'hide': True}),
43
+ 'name': ('Label', {**PINLEFT, 'editable': True}),
44
+ 'description': ('Description', {**PINLEFT, 'editable': True})
45
+ }
46
+
47
+ def run_component(props):
48
+ value = component_toolbar_buttons(key='toolbar_buttons_labels', **props)
49
+ return value
50
+
51
+ def handle_event(value):
52
+ if value is not None:
53
+ if 'action_timestamp' not in st.session_state:
54
+ self.action_event = True
55
+ st.session_state['action_timestamp'] = value['timestamp']
56
+ else:
57
+ if st.session_state['action_timestamp'] != value['timestamp']:
58
+ self.action_event = True
59
+ st.session_state['action_timestamp'] = value['timestamp']
60
+ else:
61
+ self.action_event = False
62
+
63
+ if value is not None and value['action'] == 'create' and self.action_event:
64
+ if st.session_state['action'] != 'delete':
65
+ max_id = self.df['id'].max()
66
+ self.df.loc[-1] = [max_id + 1, '', ''] # adding a row
67
+ self.df.index = self.df.index + 1 # shifting index
68
+ self.df.sort_index(inplace=True)
69
+ st.session_state['action'] = 'create'
70
+ elif value is not None and value['action'] == 'delete' and self.action_event:
71
+ if st.session_state['action'] != 'delete' and st.session_state['action'] != 'create':
72
+ rows = st.session_state['selected_rows']
73
+ if len(rows) > 0:
74
+ idx = rows[0]['_selectedRowNodeInfo']['nodeRowIndex']
75
+ self.df.drop(self.df.index[idx], inplace=True)
76
+ self.df.reset_index(drop=True, inplace=True)
77
+ st.session_state['action'] = 'delete'
78
+ elif value is not None and value['action'] == 'save' and self.action_event:
79
+ st.session_state['action'] = 'save'
80
+
81
+ props = {
82
+ 'buttons': {
83
+ 'create': False,
84
+ 'delete': False,
85
+ 'save': False,
86
+ }
87
+ }
88
+
89
+ handle_event(run_component(props))
90
+
91
+ if st.session_state['action'] == 'save' and 'response' in st.session_state:
92
+ if st.session_state['response'] is not None:
93
+ self.df = st.session_state['response']
94
+ st.session_state['response'] = None
95
+
96
+ if st.session_state['action'] == 'create' and 'response' in st.session_state:
97
+ if st.session_state['response'] is not None:
98
+ self.df = st.session_state['response']
99
+
100
+ if st.session_state['action'] == 'delete' and 'response' in st.session_state:
101
+ if st.session_state['response'] is not None:
102
+ self.df = st.session_state['response']
103
+
104
+ response = agstyler.draw_grid(
105
+ self.df,
106
+ formatter=formatter,
107
+ fit_columns=True,
108
+ pagination_size=10,
109
+ selection="single",
110
+ use_checkbox=False
111
+ )
112
+
113
+ rows = response['selected_rows']
114
+ st.session_state['selected_rows'] = rows
115
+
116
+ if st.session_state['action'] == 'create' and self.action_event:
117
+ st.session_state['response'] = response['data']
118
+ elif st.session_state['action'] == 'delete' and self.action_event:
119
+ st.session_state['response'] = response['data']
120
+ elif st.session_state['action'] == 'save' and self.action_event:
121
+ data = response['data'].values.tolist()
122
+ rows = []
123
+ for row in data:
124
+ rows.append({'id': row[0], 'name': row[1], 'description': row[2]})
125
+
126
+ labels_json['labels'] = rows
127
+ with open(model.labels_file, "w") as f:
128
+ json.dump(labels_json, f, indent=2)
129
+
130
+
131
+ def setup_groups(self, model):
132
+ self.action_event = False
133
+ if 'action' not in st.session_state:
134
+ st.session_state['action'] = None
135
+
136
+ with open(model.groups_file, "r") as f:
137
+ groups_json = json.load(f)
138
+
139
+ groups = groups_json["groups"]
140
+
141
+ data = []
142
+ for group in groups:
143
+ data.append({'id': group['id'], 'name': group['name'], 'description': group['description']})
144
+ self.df = pd.DataFrame(data)
145
+
146
+ formatter = {
147
+ 'id': ('ID', {'hide': True}),
148
+ 'name': ('Group', {**PINLEFT, 'editable': True}),
149
+ 'description': ('Description', {**PINLEFT, 'editable': True})
150
+ }
151
+
152
+ def run_component(props):
153
+ value = component_toolbar_buttons(key='toolbar_buttons_groups', **props)
154
+ return value
155
+
156
+ def handle_event(value):
157
+ if value is not None:
158
+ if 'action_timestamp' not in st.session_state:
159
+ self.action_event = True
160
+ st.session_state['action_timestamp'] = value['timestamp']
161
+ else:
162
+ if st.session_state['action_timestamp'] != value['timestamp']:
163
+ self.action_event = True
164
+ st.session_state['action_timestamp'] = value['timestamp']
165
+ else:
166
+ self.action_event = False
167
+
168
+ if value is not None and value['action'] == 'create' and self.action_event:
169
+ if st.session_state['action'] != 'delete':
170
+ max_id = self.df['id'].max()
171
+ self.df.loc[-1] = [max_id + 1, '', ''] # adding a row
172
+ self.df.index = self.df.index + 1 # shifting index
173
+ self.df.sort_index(inplace=True)
174
+ st.session_state['action'] = 'create'
175
+ elif value is not None and value['action'] == 'delete' and self.action_event:
176
+ if st.session_state['action'] != 'delete' and st.session_state['action'] != 'create':
177
+ rows = st.session_state['selected_rows']
178
+ if len(rows) > 0:
179
+ idx = rows[0]['_selectedRowNodeInfo']['nodeRowIndex']
180
+ self.df.drop(self.df.index[idx], inplace=True)
181
+ self.df.reset_index(drop=True, inplace=True)
182
+ st.session_state['action'] = 'delete'
183
+ elif value is not None and value['action'] == 'save' and self.action_event:
184
+ st.session_state['action'] = 'save'
185
+
186
+ props = {
187
+ 'buttons': {
188
+ 'create': False,
189
+ 'delete': False,
190
+ 'save': False,
191
+ }
192
+ }
193
+
194
+ handle_event(run_component(props))
195
+
196
+ if st.session_state['action'] == 'save' and 'response' in st.session_state:
197
+ if st.session_state['response'] is not None:
198
+ self.df = st.session_state['response']
199
+ st.session_state['response'] = None
200
+
201
+ if st.session_state['action'] == 'create' and 'response' in st.session_state:
202
+ if st.session_state['response'] is not None:
203
+ self.df = st.session_state['response']
204
+
205
+ if st.session_state['action'] == 'delete' and 'response' in st.session_state:
206
+ if st.session_state['response'] is not None:
207
+ self.df = st.session_state['response']
208
+
209
+ response = agstyler.draw_grid(
210
+ self.df,
211
+ formatter=formatter,
212
+ fit_columns=True,
213
+ pagination_size=10,
214
+ selection="single",
215
+ use_checkbox=False
216
+ )
217
+
218
+ rows = response['selected_rows']
219
+ st.session_state['selected_rows'] = rows
220
+
221
+ if st.session_state['action'] == 'create' and self.action_event:
222
+ st.session_state['response'] = response['data']
223
+ elif st.session_state['action'] == 'delete' and self.action_event:
224
+ st.session_state['response'] = response['data']
225
+ elif st.session_state['action'] == 'save' and self.action_event:
226
+ data = response['data'].values.tolist()
227
+ rows = []
228
+ for row in data:
229
+ rows.append({'id': row[0], 'name': row[1], 'description': row[2]})
230
+
231
+ groups_json['groups'] = rows
232
+ with open(model.groups_file, "w") as f:
233
+ json.dump(groups_json, f, indent=2)