ignacioct commited on
Commit
c5c9597
1 Parent(s): 156a7e8

pushing the application

Browse files
Files changed (4) hide show
  1. .gitignore +160 -0
  2. README.md +5 -5
  3. app.py +594 -0
  4. requirements.txt +72 -0
.gitignore ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ #.idea/
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
- title: Benchmark Annotation Argilla Dashboard
3
- emoji: 🏢
4
- colorFrom: gray
5
- colorTo: green
6
  sdk: gradio
7
- sdk_version: 4.20.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
1
  ---
2
+ title: SomosNLPDashboard
3
+ emoji: 🌖
4
+ colorFrom: purple
5
+ colorTo: yellow
6
  sdk: gradio
7
+ sdk_version: 4.19.2
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py ADDED
@@ -0,0 +1,594 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Dashboard to visualize the progress of the SomosNLP project.
3
+ by Argilla.
4
+
5
+ This dashboard shows the progress of the SomosNLP project, including the number of annotated and pending records, the top annotators, and the remaining records to be annotated.
6
+ The data is fetched from the source datasets and updated every 5 minutes.
7
+ Due to Gradio's limitation on what can be passed as input to their graph methods, the data is fetched outside of the graph methods and stored in global variables. Therefore,
8
+ a function for each graph-dataset tuple is needed. Moreover, to also avoid circular imports, all the functions must be
9
+ in the same Python file. This behavior is not ideal, and could be improved knowing how to pass input parameter to graph functions in Gradio.
10
+ """
11
+
12
+ import datetime
13
+ import os
14
+ from typing import Dict, List, Tuple
15
+ from uuid import UUID
16
+
17
+ import altair as alt
18
+ from apscheduler.schedulers.background import BackgroundScheduler
19
+ import argilla as rg
20
+ from argilla.feedback import FeedbackDataset
21
+ from argilla.client.feedback.dataset.remote.dataset import RemoteFeedbackDataset
22
+ import gradio as gr
23
+ import pandas as pd
24
+
25
+
26
+ def get_source_datasets() -> Tuple[
27
+ FeedbackDataset | RemoteFeedbackDataset,
28
+ FeedbackDataset | RemoteFeedbackDataset,
29
+ FeedbackDataset | RemoteFeedbackDataset,
30
+ ]:
31
+ """
32
+ This function returns the source datasets to be showed in the visualization. The datasets names
33
+ and the workspace name is obtained from the environment variables.
34
+
35
+ Returns:
36
+ A tuple with the three source datasets
37
+ """
38
+
39
+ return (
40
+ rg.FeedbackDataset.from_argilla(
41
+ os.getenv("SOURCE_DATASET_1"), workspace=os.getenv("SOURCE_WORKSPACE")
42
+ ),
43
+ rg.FeedbackDataset.from_argilla(
44
+ os.getenv("SOURCE_DATASET_2"), workspace=os.getenv("SOURCE_WORKSPACE")
45
+ ),
46
+ rg.FeedbackDataset.from_argilla(
47
+ os.getenv("SOURCE_DATASET_3"), workspace=os.getenv("SOURCE_WORKSPACE")
48
+ ),
49
+ )
50
+
51
+
52
+ def get_user_annotations_dictionary(
53
+ datasets: List[FeedbackDataset | RemoteFeedbackDataset],
54
+ ) -> Dict[str, int]:
55
+ """
56
+ This function returns a dictionary with the username as the key and the number of annotations as the value.
57
+ All annotationsfrom all datasets are introduced in the same dictionary.
58
+
59
+ Args:
60
+ datasets: A list with the datasets to be used to obtain the annotations and the annotators.
61
+ Returns:
62
+ A dictionary with the username as the key and the number of annotations as the value.
63
+ """
64
+ output = {}
65
+ for dataset in datasets:
66
+ for record in dataset:
67
+ for response in record.responses:
68
+ if str(response.user_id) not in output.keys():
69
+ output[str(response.user_id)] = 1
70
+ else:
71
+ output[str(response.user_id)] += 1
72
+
73
+ # Changing the name of the keys, from the id to the username
74
+ for key in list(output.keys()):
75
+ output[rg.User.from_id(UUID(key)).username] = output.pop(key)
76
+
77
+ return output
78
+
79
+
80
+ def donut_chart_1() -> alt.Chart:
81
+ """
82
+ This function returns a donut chart with the number of annotated and pending records, for the first dataset
83
+
84
+ Returns:
85
+ An altair chart with the donut chart.
86
+ """
87
+
88
+ annotated_records = len(dataset1.filter_by(response_status=["submitted"]))
89
+ pending_records = len(dataset1) - annotated_records
90
+
91
+ source = pd.DataFrame(
92
+ {
93
+ "values": [annotated_records, pending_records],
94
+ "category": ["Annotated", "Pending"], # Add a new column for categories
95
+ }
96
+ )
97
+
98
+ base = alt.Chart(source).encode(
99
+ theta=alt.Theta("values:Q", stack=True),
100
+ radius=alt.Radius(
101
+ "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)
102
+ ),
103
+ color=alt.Color("category:N", legend=alt.Legend(title="Category")),
104
+ )
105
+
106
+ c1 = base.mark_arc(innerRadius=20, stroke="#fff")
107
+
108
+ c2 = base.mark_text(radiusOffset=10).encode(text="values:Q")
109
+
110
+ chart = c1 + c2
111
+
112
+ return chart
113
+
114
+
115
+ def donut_chart_2() -> alt.Chart:
116
+ """
117
+ This function returns a donut chart with the number of annotated and pending records, for the second dataset.
118
+
119
+ Returns:
120
+ An altair chart with the donut chart.
121
+ """
122
+
123
+ annotated_records = len(dataset2.filter_by(response_status=["submitted"]))
124
+ pending_records = len(dataset2) - annotated_records
125
+
126
+ source = pd.DataFrame(
127
+ {
128
+ "values": [annotated_records, pending_records],
129
+ "category": ["Annotated", "Pending"], # Add a new column for categories
130
+ }
131
+ )
132
+
133
+ base = alt.Chart(source).encode(
134
+ theta=alt.Theta("values:Q", stack=True),
135
+ radius=alt.Radius(
136
+ "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)
137
+ ),
138
+ color=alt.Color("category:N", legend=alt.Legend(title="Category")),
139
+ )
140
+
141
+ c1 = base.mark_arc(innerRadius=20, stroke="#fff")
142
+
143
+ c2 = base.mark_text(radiusOffset=10).encode(text="values:Q")
144
+
145
+ chart = c1 + c2
146
+
147
+ return chart
148
+
149
+
150
+ def donut_chart_3() -> alt.Chart:
151
+ """
152
+ This function returns a donut chart with the number of annotated and pending records, for the third dataset.
153
+
154
+ Returns:
155
+ An altair chart with the donut chart.
156
+ """
157
+
158
+ annotated_records = len(dataset3.filter_by(response_status=["submitted"]))
159
+ pending_records = len(dataset3) - annotated_records
160
+
161
+ source = pd.DataFrame(
162
+ {
163
+ "values": [annotated_records, pending_records],
164
+ "category": ["Annotated", "Pending"], # Add a new column for categories
165
+ }
166
+ )
167
+
168
+ base = alt.Chart(source).encode(
169
+ theta=alt.Theta("values:Q", stack=True),
170
+ radius=alt.Radius(
171
+ "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)
172
+ ),
173
+ color=alt.Color("category:N", legend=alt.Legend(title="Category")),
174
+ )
175
+
176
+ c1 = base.mark_arc(innerRadius=20, stroke="#fff")
177
+
178
+ c2 = base.mark_text(radiusOffset=10).encode(text="values:Q")
179
+
180
+ chart = c1 + c2
181
+
182
+ return chart
183
+
184
+
185
+ def kpi_chart_submitted_1() -> alt.Chart:
186
+ """
187
+ This function returns a KPI chart with the total amount of records that have been annotated, for the first dataset.
188
+
189
+ Returns:
190
+ An altair chart with the KPI chart.
191
+ """
192
+
193
+ total = len(dataset1.filter_by(response_status=["submitted"]))
194
+
195
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
196
+ data = pd.DataFrame({"Category": ["Total completed"], "Value": [total]})
197
+
198
+ # Create Altair chart
199
+ chart = (
200
+ alt.Chart(data)
201
+ .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue")
202
+ .encode(text="Value:N")
203
+ .properties(title="Total completed", width=250, height=200)
204
+ )
205
+
206
+ return chart
207
+
208
+
209
+ def kpi_chart_submitted_2() -> alt.Chart:
210
+ """
211
+ This function returns a KPI chart with the total amount of records that have been annotated, for the second dataset.
212
+
213
+ Returns:
214
+ An altair chart with the KPI chart.
215
+ """
216
+
217
+ total = len(dataset2.filter_by(response_status=["submitted"]))
218
+
219
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
220
+ data = pd.DataFrame({"Category": ["Total completed"], "Value": [total]})
221
+
222
+ # Create Altair chart
223
+ chart = (
224
+ alt.Chart(data)
225
+ .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue")
226
+ .encode(text="Value:N")
227
+ .properties(title="Total completed", width=250, height=200)
228
+ )
229
+
230
+ return chart
231
+
232
+
233
+ def kpi_chart_submitted_3() -> alt.Chart:
234
+ """
235
+ This function returns a KPI chart with the total amount of records that have been annotated, for the third dataset.
236
+
237
+ Returns:
238
+ An altair chart with the KPI chart.
239
+ """
240
+
241
+ total = len(dataset3.filter_by(response_status=["submitted"]))
242
+
243
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
244
+ data = pd.DataFrame({"Category": ["Total completed"], "Value": [total]})
245
+
246
+ # Create Altair chart
247
+ chart = (
248
+ alt.Chart(data)
249
+ .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue")
250
+ .encode(text="Value:N")
251
+ .properties(title="Total completed", width=250, height=200)
252
+ )
253
+
254
+ return chart
255
+
256
+
257
+ def kpi_chart_remaining_1() -> alt.Chart:
258
+ """
259
+ This function returns a KPI chart with the remaining amount of records to be annotated, for the first dataset.
260
+
261
+ Returns:
262
+ An altair chart with the KPI chart.
263
+ """
264
+
265
+ annotated_records = len(dataset1.filter_by(response_status=["submitted"]))
266
+ pending_records = len(dataset1) - annotated_records
267
+
268
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
269
+ data = pd.DataFrame({"Category": ["Total remaining"], "Value": [pending_records]})
270
+
271
+ # Create Altair chart
272
+ chart = (
273
+ alt.Chart(data)
274
+ .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue")
275
+ .encode(text="Value:N")
276
+ .properties(title="Total remaining", width=250, height=200)
277
+ )
278
+
279
+ return chart
280
+
281
+
282
+ def kpi_chart_remaining_2() -> alt.Chart:
283
+ """
284
+ This function returns a KPI chart with the remaining amount of records to be annotated, for the second dataset.
285
+ Returns:
286
+ An altair chart with the KPI chart.
287
+ """
288
+
289
+ annotated_records = len(dataset2.filter_by(response_status=["submitted"]))
290
+ pending_records = len(dataset2) - annotated_records
291
+
292
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
293
+ data = pd.DataFrame({"Category": ["Total remaining"], "Value": [pending_records]})
294
+
295
+ # Create Altair chart
296
+ chart = (
297
+ alt.Chart(data)
298
+ .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue")
299
+ .encode(text="Value:N")
300
+ .properties(title="Total remaining", width=250, height=200)
301
+ )
302
+
303
+ return chart
304
+
305
+
306
+ def kpi_chart_remaining_3() -> alt.Chart:
307
+ """
308
+ This function returns a KPI chart with the remaining amount of records to be annotated, for the third dataset.
309
+
310
+ Returns:
311
+ An altair chart with the KPI chart.
312
+ """
313
+
314
+ annotated_records = len(dataset3.filter_by(response_status=["submitted"]))
315
+ pending_records = len(dataset3) - annotated_records
316
+
317
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
318
+ data = pd.DataFrame({"Category": ["Total remaining"], "Value": [pending_records]})
319
+
320
+ # Create Altair chart
321
+ chart = (
322
+ alt.Chart(data)
323
+ .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue")
324
+ .encode(text="Value:N")
325
+ .properties(title="Total remaining", width=250, height=200)
326
+ )
327
+
328
+ return chart
329
+
330
+
331
+ def render_hub_user_link(hub_id: str) -> str:
332
+ """
333
+ This function formats the username with a link to the user's profile in the Hugging Face Hub.
334
+
335
+ Args:
336
+ hub_id: The user's id in the Hugging Face Hub.
337
+ Returns:
338
+ A string with the username formatted as a link to the user's profile in the Hugging Face Hub.
339
+ """
340
+ link = f"https://huggingface.co/{hub_id}"
341
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{hub_id}</a>'
342
+
343
+
344
+ def kpi_chart_annotators() -> alt.Chart:
345
+ """
346
+ This function returns a KPI chart with the total amount of annotators.
347
+
348
+ Returns:
349
+ An altair chart with the KPI chart.
350
+ """
351
+
352
+ # Obtain the total amount of annotators
353
+ total_annotators = len(user_ids_annotations)
354
+
355
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
356
+ data = pd.DataFrame(
357
+ {"Category": ["Total Contributors"], "Value": [total_annotators]}
358
+ )
359
+
360
+ # Create Altair chart
361
+ chart = (
362
+ alt.Chart(data)
363
+ .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue")
364
+ .encode(text="Value:N")
365
+ .properties(title="Number of Contributors", width=250, height=200)
366
+ )
367
+
368
+ return chart
369
+
370
+
371
+ def obtain_top_users(user_ids_annotations: Dict[str, int]) -> pd.DataFrame:
372
+ """
373
+ This function returns the top 50 users with the most annotations. The usernames are formatted as links to the user's profile in the Hugging Face Hub.
374
+
375
+ Args:
376
+ user_ids_annotations: A dictionary with the user ids as the key and the number of annotations as the value.
377
+ Returns:
378
+ A pandas dataframe with the top 5 users with the most annotations.
379
+ """
380
+
381
+ dataframe = pd.DataFrame(
382
+ user_ids_annotations.items(), columns=["Name", "Submitted Responses"]
383
+ )
384
+ dataframe["Name"] = dataframe["Name"].apply(render_hub_user_link)
385
+ dataframe = dataframe.sort_values(by="Submitted Responses", ascending=False)
386
+ return dataframe.head(50)
387
+
388
+
389
+ def get_top() -> pd.DataFrame:
390
+ """
391
+ This function returns the top users with the most annotations. The usernames are formatted as links to the user's profile in the Hugging Face Hub.
392
+
393
+ Returns:
394
+ A pandas dataframe with the top users with the most annotations.
395
+ """
396
+ return obtain_top_users(user_ids_annotations)
397
+
398
+
399
+ def fetch_data() -> None:
400
+ """
401
+ This function fetches the data from the source datasets and updates the global variables.
402
+ """
403
+
404
+ print(f"Starting to fetch data: {datetime.datetime.now()}")
405
+
406
+ # Load the dataset as global variable to be able to use it in all Gradio graph methods,
407
+ # as they usually do not allow arguments.
408
+ global dataset1, dataset2, dataset3, user_ids_annotations
409
+ dataset1, dataset2, dataset3 = get_source_datasets()
410
+ user_ids_annotations = get_user_annotations_dictionary(
411
+ [dataset1, dataset2, dataset3]
412
+ )
413
+
414
+ # Print the current date and time
415
+ print(f"Data fetched: {datetime.datetime.now()}")
416
+
417
+
418
+ def main() -> None:
419
+
420
+ # Set the update interval
421
+ update_interval = 300 # seconds
422
+ update_interval_charts = 30 # seconds
423
+
424
+ # Connect to the space with rg.init()
425
+ rg.init(
426
+ api_url=os.getenv("ARGILLA_API_URL"),
427
+ api_key=os.getenv("ARGILLA_API_KEY"),
428
+ extra_headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
429
+ )
430
+
431
+ # Initial data fetching
432
+ fetch_data()
433
+
434
+ scheduler = BackgroundScheduler()
435
+ scheduler.add_job(
436
+ func=fetch_data, trigger="interval", seconds=update_interval, max_instances=1
437
+ )
438
+ scheduler.start()
439
+
440
+ # To avoid the orange border for the Gradio elements that are in constant loading
441
+ css = """
442
+ .generating {
443
+ border: none;
444
+ }
445
+ """
446
+
447
+ with gr.Blocks(css=css, title="LLM Benchmark en Español Dashboard") as demo:
448
+
449
+ # JSS code to force light theme
450
+ demo.load(
451
+ None,
452
+ None,
453
+ js="""
454
+ () => {
455
+ const params = new URLSearchParams(window.location.search);
456
+ if (!params.has('__theme')) {
457
+ params.set('__theme', 'light');
458
+ window.location.search = params.toString();
459
+ }
460
+ }""",
461
+ )
462
+
463
+ gr.Markdown(
464
+ """
465
+ # 🗣️ SomosNLP Progress Dashboard
466
+ """
467
+ )
468
+
469
+ gr.Markdown(
470
+ f"""
471
+ ## 🚀 Progress in dataset {os.getenv("SOURCE_DATASET_1")}
472
+ """
473
+ )
474
+ with gr.Row():
475
+
476
+ plot = gr.Plot(label="Plot")
477
+ demo.load(
478
+ kpi_chart_submitted_1,
479
+ inputs=[],
480
+ outputs=[plot],
481
+ every=update_interval_charts,
482
+ )
483
+
484
+ plot = gr.Plot(label="Plot")
485
+ demo.load(
486
+ kpi_chart_remaining_1,
487
+ inputs=[],
488
+ outputs=[plot],
489
+ every=update_interval_charts,
490
+ )
491
+
492
+ # donut_chart_plotted_1 = gr.Plot(label="Plot")
493
+ # demo.load(
494
+ # donut_chart_1,
495
+ # inputs=[],
496
+ # outputs=[donut_chart_plotted_1],
497
+ # )
498
+
499
+ gr.Markdown(
500
+ f"""
501
+ ## 🚀 Progress in dataset {os.getenv("SOURCE_DATASET_2")}
502
+ """
503
+ )
504
+ with gr.Row():
505
+
506
+ plot = gr.Plot(label="Plot")
507
+ demo.load(
508
+ kpi_chart_submitted_2,
509
+ inputs=[],
510
+ outputs=[plot],
511
+ every=update_interval_charts,
512
+ )
513
+
514
+ plot = gr.Plot(label="Plot")
515
+ demo.load(
516
+ kpi_chart_remaining_2,
517
+ inputs=[],
518
+ outputs=[plot],
519
+ every=update_interval_charts,
520
+ )
521
+
522
+ # donut_chart_plotted_2 = gr.Plot(label="Plot")
523
+ # demo.load(
524
+ # donut_chart_2,
525
+ # inputs=[],
526
+ # outputs=[donut_chart_plotted_2],
527
+ # )
528
+
529
+ gr.Markdown(
530
+ f"""
531
+ ## 🚀 Progress in dataset {os.getenv("SOURCE_DATASET_3")}
532
+ """
533
+ )
534
+ with gr.Row():
535
+
536
+ plot = gr.Plot(label="Plot")
537
+ demo.load(
538
+ kpi_chart_submitted_3,
539
+ inputs=[],
540
+ outputs=[plot],
541
+ every=update_interval_charts,
542
+ )
543
+
544
+ plot = gr.Plot(label="Plot")
545
+ demo.load(
546
+ kpi_chart_remaining_3,
547
+ inputs=[],
548
+ outputs=[plot],
549
+ every=update_interval_charts,
550
+ )
551
+
552
+ # donut_chart_plotted_3 = gr.Plot(label="Plot")
553
+ # demo.load(
554
+ # donut_chart_3,
555
+ # inputs=[],
556
+ # outputs=[donut_chart_plotted_3],
557
+ # )
558
+
559
+ gr.Markdown(
560
+ """
561
+ ## 👾 Contributors Hall of Fame
562
+ The number of all contributors and the top contributors:
563
+ """
564
+ )
565
+
566
+ with gr.Row():
567
+
568
+ plot2 = gr.Plot(label="Plot")
569
+ demo.load(
570
+ kpi_chart_annotators,
571
+ inputs=[],
572
+ outputs=[plot2],
573
+ every=update_interval_charts,
574
+ )
575
+
576
+ top_df_plot = gr.Dataframe(
577
+ headers=["Name", "Submitted Responses"],
578
+ datatype=[
579
+ "markdown",
580
+ "number",
581
+ ],
582
+ row_count=50,
583
+ col_count=(2, "fixed"),
584
+ interactive=False,
585
+ )
586
+
587
+ demo.load(get_top, None, [top_df_plot], every=update_interval_charts)
588
+
589
+ # Launch the Gradio interface
590
+ demo.launch(share=True, debug=True)
591
+
592
+
593
+ if __name__ == "__main__":
594
+ main()
requirements.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.2.0
3
+ annotated-types==0.6.0
4
+ anyio==4.2.0
5
+ apscheduler==3.10.4
6
+ argilla==1.23.0
7
+ attrs==23.2.0
8
+ backoff==2.2.1
9
+ certifi==2024.2.2
10
+ charset-normalizer==3.3.2
11
+ click==8.1.7
12
+ colorama==0.4.6
13
+ contourpy==1.2.0
14
+ cycler==0.12.1
15
+ Deprecated==1.2.14
16
+ exceptiongroup==1.2.0
17
+ fastapi==0.109.2
18
+ ffmpy==0.3.1
19
+ filelock==3.13.1
20
+ fonttools==4.48.1
21
+ fsspec==2024.2.0
22
+ gradio==4.17.0
23
+ gradio_client==0.9.0
24
+ h11==0.14.0
25
+ httpcore==1.0.2
26
+ httpx==0.26.0
27
+ huggingface-hub==0.20.3
28
+ idna==3.6
29
+ importlib-resources==6.1.1
30
+ Jinja2==3.1.3
31
+ jsonschema==4.21.1
32
+ jsonschema-specifications==2023.12.1
33
+ kiwisolver==1.4.5
34
+ markdown-it-py==3.0.0
35
+ MarkupSafe==2.1.5
36
+ matplotlib==3.8.2
37
+ mdurl==0.1.2
38
+ monotonic==1.6
39
+ numpy==1.23.5
40
+ orjson==3.9.13
41
+ packaging==23.2
42
+ pandas==1.5.3
43
+ pillow==10.2.0
44
+ pydantic==2.6.1
45
+ pydantic_core==2.16.2
46
+ pydub==0.25.1
47
+ Pygments==2.17.2
48
+ pyparsing==3.1.1
49
+ python-dateutil==2.8.2
50
+ python-multipart==0.0.7
51
+ pytz==2024.1
52
+ PyYAML==6.0.1
53
+ referencing==0.33.0
54
+ requests==2.31.0
55
+ rich==13.7.0
56
+ rpds-py==0.17.1
57
+ ruff==0.2.1
58
+ semantic-version==2.10.0
59
+ shellingham==1.5.4
60
+ six==1.16.0
61
+ sniffio==1.3.0
62
+ starlette==0.36.3
63
+ tomlkit==0.12.0
64
+ toolz==0.12.1
65
+ tqdm==4.66.1
66
+ typer==0.9.0
67
+ typing_extensions==4.9.0
68
+ urllib3==2.2.0
69
+ uvicorn==0.27.0.post1
70
+ vega-datasets==0.9.0
71
+ websockets==11.0.3
72
+ wrapt==1.14.1