ignacioct commited on
Commit
b6d9f6b
β€’
1 Parent(s): ac15c95

annotations among languages resdy

Browse files
Files changed (4) hide show
  1. .gitignore +80 -0
  2. README.md +3 -5
  3. app.py +248 -0
  4. requirements.txt +72 -0
.gitignore ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Distribution / packaging
7
+ .Python
8
+ build/
9
+ develop-eggs/
10
+ dist/
11
+ downloads/
12
+ eggs/
13
+ .eggs/
14
+ lib/
15
+ lib64/
16
+ parts/
17
+ sdist/
18
+ var/
19
+ wheels/
20
+ share/python-wheels/
21
+ *.egg-info/
22
+ .installed.cfg
23
+ *.egg
24
+ MANIFEST
25
+
26
+ # IDEs and editors
27
+ .idea/
28
+ .vscode/
29
+ *.sublime-project
30
+ *.sublime-workspace
31
+
32
+ # Installer logs
33
+ pip-log.txt
34
+ pip-delete-this-directory.txt
35
+
36
+ # Unit test / coverage reports
37
+ htmlcov/
38
+ .tox/
39
+ .nox/
40
+ .coverage
41
+ .coverage.*
42
+ .cache
43
+ nosetests.xml
44
+ coverage.xml
45
+ *.cover
46
+ *.py,cover
47
+ .hypothesis/
48
+ .pytest_cache/
49
+ cover/
50
+
51
+ # Sphinx documentation
52
+ docs/_build/
53
+
54
+ # Jupyter Notebook
55
+ .ipynb_checkpoints
56
+
57
+ # pyenv
58
+ # For a library or package, you might want to ignore these files since the code is
59
+ # intended to run in multiple environments; otherwise, check them in:
60
+ .python-version
61
+
62
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
63
+ __pypackages__/
64
+
65
+ # Environments
66
+ .env
67
+ .venv
68
+ env/
69
+ venv/
70
+ ENV/
71
+ env.bak/
72
+ venv.bak/
73
+
74
+ # mkdocs documentation
75
+ /site
76
+
77
+ # Other
78
+ *.log
79
+ *.swp
80
+ .DS_Store
README.md CHANGED
@@ -1,13 +1,11 @@
1
  ---
2
- title: PromptTranslationMultilingualDashboard
3
- emoji: πŸ†
4
  colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.22.0
8
  app_file: app.py
9
- pinned: false
10
  license: apache-2.0
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Multilingual Dashboard - Multilingual Prompt Evaluation ProjectlDashboard
3
+ emoji: 🌎
4
  colorFrom: green
5
  colorTo: green
6
  sdk: gradio
7
  sdk_version: 4.22.0
8
  app_file: app.py
9
+ pinned: true
10
  license: apache-2.0
11
  ---
 
 
app.py ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datetime
2
+ import os
3
+ from typing import Dict, Tuple
4
+ from uuid import UUID
5
+
6
+ import altair as alt
7
+ import argilla as rg
8
+ from argilla.feedback import FeedbackDataset
9
+ from argilla.client.feedback.dataset.remote.dataset import RemoteFeedbackDataset
10
+ import gradio as gr
11
+ import pandas as pd
12
+
13
+ # Translation of legends and titels
14
+ ANNOTATED = "Annotated"
15
+ NUMBER_ANNOTATED = "Total Annotations"
16
+ PENDING = "Pending Annotations"
17
+
18
+ NUMBER_ANNOTATORS = "Number of Annotators"
19
+ NAME = "Username"
20
+ NUMBER_ANNOTATIONS = "Number of Annotations"
21
+
22
+ CATEGORY = "Category"
23
+
24
+ SUPPORTED_LANGUAGES = [
25
+ "Spanish",
26
+ ]
27
+
28
+
29
+ def get_user_annotations_dictionary(
30
+ dataset: FeedbackDataset | RemoteFeedbackDataset,
31
+ ) -> Dict[str, int]:
32
+ """
33
+ This function returns a dictionary with the username as the key and the number of annotations as the value.
34
+
35
+ Args:
36
+ dataset: The dataset to be analyzed.
37
+ Returns:
38
+ A dictionary with the username as the key and the number of annotations as the value.
39
+ """
40
+ output = {}
41
+ for record in dataset:
42
+ for response in record.responses:
43
+ if str(response.user_id) not in output.keys():
44
+ output[str(response.user_id)] = 1
45
+ else:
46
+ output[str(response.user_id)] += 1
47
+
48
+ # Changing the name of the keys, from the id to the username
49
+ for key in list(output.keys()):
50
+ output[rg.User.from_id(UUID(key)).username] = output.pop(key)
51
+
52
+ return output
53
+
54
+
55
+ def fetch_data() -> Tuple[Dict[str, int], Dict[str, dict]]:
56
+ """
57
+ This function fetches the data from all the datasets and stores the annotation information in two dictionaries.
58
+ To do so, looks for all the environment variables that follow this pattern:
59
+ - SPANISH_API_URL
60
+ - SPANISH_API_KEY
61
+ - SPANISH_DATASET
62
+ - SPANISH_WORKSPACE
63
+ If the language name matches with one of the languages present in our SUPPORTED_LANGUAGES list, it will fetch the data
64
+ with the total amount of annotations and the total annotators.
65
+
66
+ Returns:
67
+ Tuple[Dict[str, int], Dict[str, dict]]: A tuple with two dictionaries. The first one contains the total amount of annotations
68
+ for each language. The second one contains the total annotators for each language.
69
+ """
70
+
71
+ print(f"Starting to fetch data: {datetime.datetime.now()}")
72
+
73
+ # Obtain all the environment variables
74
+ environment_variables_languages = {}
75
+
76
+ for language in SUPPORTED_LANGUAGES:
77
+
78
+ print("Fetching data for: ", language)
79
+
80
+ if not os.getenv(f"{language.upper()}_API_URL"):
81
+ print(f"Missing environment variables for {language}")
82
+ continue
83
+
84
+ environment_variables_languages[language] = {
85
+ "api_url": os.getenv(f"{language.upper()}_API_URL"),
86
+ "api_key": os.getenv(f"{language.upper()}_API_KEY"),
87
+ "dataset_name": os.getenv(f"{language.upper()}_DATASET"),
88
+ "workspace_name": os.getenv(f"{language.upper()}_WORKSPACE"),
89
+ }
90
+
91
+ global annotations, annotators
92
+ annotations = {}
93
+ annotators = {}
94
+
95
+ # Connect to each space and obtain the total amount of annotations and annotators
96
+ for language, environment_variables in environment_variables_languages.items():
97
+ rg.init(
98
+ api_url=environment_variables["api_url"],
99
+ api_key=environment_variables["api_key"],
100
+ )
101
+
102
+ # Obtain the dataset and see how many pending records are there
103
+ dataset = rg.FeedbackDataset.from_argilla(
104
+ environment_variables["dataset_name"],
105
+ workspace=environment_variables["workspace_name"],
106
+ )
107
+
108
+ # filtered_source_dataset = source_dataset.filter_by(response_status=["pending"])
109
+
110
+ target_dataset = dataset.filter_by(response_status=["submitted"])
111
+
112
+ annotations[language.lower()] = len(target_dataset)
113
+ annotators[language.lower()] = {
114
+ "annotators": get_user_annotations_dictionary(target_dataset)
115
+ }
116
+
117
+ # Print the current date and time
118
+ print(f"Data fetched: {datetime.datetime.now()}")
119
+
120
+ return annotations, annotators
121
+
122
+
123
+ def kpi_chart_total_annotations() -> alt.Chart:
124
+ """
125
+ This function returns a KPI chart with the total amount of annotators.
126
+ Returns:
127
+ An altair chart with the KPI chart.
128
+ """
129
+
130
+ total_annotations = 0
131
+ for language in annotations.keys():
132
+ total_annotations += annotations[language]
133
+
134
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
135
+ data = pd.DataFrame({"Category": [NUMBER_ANNOTATED], "Value": [total_annotations]})
136
+
137
+ # Create Altair chart
138
+ chart = (
139
+ alt.Chart(data)
140
+ .mark_text(fontSize=100, align="center", baseline="middle", color="#e68b39")
141
+ .encode(text="Value:N")
142
+ .properties(title=NUMBER_ANNOTATED, width=250, height=200)
143
+ )
144
+
145
+ return chart
146
+
147
+
148
+ def donut_chart_total() -> alt.Chart:
149
+ """
150
+ This function returns a donut chart with the progress of the total annotations in each language.
151
+
152
+ Returns:
153
+ An altair chart with the donut chart.
154
+ """
155
+
156
+ # Load your data
157
+ annotated_records = [annotation for annotation in annotations.values()]
158
+ languages = [language for language in annotations.keys()]
159
+
160
+ # Prepare data for the donut chart
161
+ source = pd.DataFrame(
162
+ {
163
+ "values": annotated_records,
164
+ "category": languages,
165
+ #"colors": ["#4682b4", "#e68c39"], # Blue for Completed, Orange for Remaining
166
+ }
167
+ )
168
+
169
+ base = alt.Chart(source).encode(
170
+ theta=alt.Theta("values:Q", stack=True),
171
+ radius=alt.Radius(
172
+ "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)
173
+ ),
174
+ color=alt.Color(
175
+ field="category",
176
+ type="nominal",
177
+ legend=alt.Legend(title=CATEGORY),
178
+ ),
179
+ )
180
+
181
+ c1 = base.mark_arc(innerRadius=20, stroke="#fff")
182
+
183
+ c2 = base.mark_text(radiusOffset=20).encode(text="values:Q")
184
+
185
+ chart = c1 + c2
186
+
187
+ return chart
188
+
189
+
190
+ def main() -> None:
191
+
192
+ fetch_data()
193
+
194
+ # To avoid the orange border for the Gradio elements that are in constant loading
195
+ css = """
196
+ .generating {
197
+ border: none;
198
+ }
199
+ """
200
+
201
+ with gr.Blocks(css=css) as demo:
202
+ gr.Markdown(
203
+ """
204
+ # 🌍 Translation Efforts Dashboard - Multilingual Prompt Evaluation Project
205
+ You can check out the progress done in each language for the Multilingual Prompt Evaluation Project in this dashboard. If you want to add a new language to this dashboard, please open an issue and we will contact you to obtain the necessary API KEYs and URLs include your language in this dashboard.
206
+
207
+ ## How to participate
208
+ Participating is easy. Go to the [annotation space](https://somosnlp-dibt-prompt-translation-for-es.hf.space/), log in or create a Hugging Face account, and you can start working.
209
+ """
210
+ )
211
+
212
+ gr.Markdown(
213
+ f"""
214
+ ## πŸš€ Annotations among Languages
215
+ Here you can see the progress of the annotations among the different languages.
216
+ """
217
+ )
218
+
219
+ with gr.Row():
220
+
221
+ kpi_chart_annotations = gr.Plot(label="Plot")
222
+ demo.load(
223
+ kpi_chart_total_annotations,
224
+ inputs=[],
225
+ outputs=[kpi_chart_annotations],
226
+ )
227
+
228
+ donut_languages = gr.Plot(label="Plot")
229
+ demo.load(
230
+ donut_chart_total,
231
+ inputs=[],
232
+ outputs=[donut_languages],
233
+ )
234
+
235
+ gr.Markdown(
236
+ """
237
+ ## πŸ‘Ύ Hall of Fame
238
+ Check out the users with more contributions among the different translation efforts.
239
+
240
+ """
241
+ )
242
+
243
+ # Launch the Gradio interface
244
+ demo.launch()
245
+
246
+
247
+ if __name__ == "__main__":
248
+ main()
requirements.txt ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.2.0
3
+ annotated-types==0.6.0
4
+ anyio==4.2.0
5
+ apscheduler==3.10.4
6
+ argilla==1.23.0
7
+ attrs==23.2.0
8
+ backoff==2.2.1
9
+ certifi==2024.2.2
10
+ charset-normalizer==3.3.2
11
+ click==8.1.7
12
+ colorama==0.4.6
13
+ contourpy==1.2.0
14
+ cycler==0.12.1
15
+ Deprecated==1.2.14
16
+ exceptiongroup==1.2.0
17
+ fastapi==0.109.2
18
+ ffmpy==0.3.1
19
+ filelock==3.13.1
20
+ fonttools==4.48.1
21
+ fsspec==2024.2.0
22
+ gradio==4.17.0
23
+ gradio_client==0.9.0
24
+ h11==0.14.0
25
+ httpcore==1.0.2
26
+ httpx==0.26.0
27
+ huggingface-hub==0.20.3
28
+ idna==3.6
29
+ importlib-resources==6.1.1
30
+ Jinja2==3.1.3
31
+ jsonschema==4.21.1
32
+ jsonschema-specifications==2023.12.1
33
+ kiwisolver==1.4.5
34
+ markdown-it-py==3.0.0
35
+ MarkupSafe==2.1.5
36
+ matplotlib==3.8.2
37
+ mdurl==0.1.2
38
+ monotonic==1.6
39
+ numpy==1.23.5
40
+ orjson==3.9.13
41
+ packaging==23.2
42
+ pandas==1.5.3
43
+ pillow==10.2.0
44
+ pydantic==2.6.1
45
+ pydantic_core==2.16.2
46
+ pydub==0.25.1
47
+ Pygments==2.17.2
48
+ pyparsing==3.1.1
49
+ python-dateutil==2.8.2
50
+ python-multipart==0.0.7
51
+ pytz==2024.1
52
+ PyYAML==6.0.1
53
+ referencing==0.33.0
54
+ requests==2.31.0
55
+ rich==13.7.0
56
+ rpds-py==0.17.1
57
+ ruff==0.2.1
58
+ semantic-version==2.10.0
59
+ shellingham==1.5.4
60
+ six==1.16.0
61
+ sniffio==1.3.0
62
+ starlette==0.36.3
63
+ tomlkit==0.12.0
64
+ toolz==0.12.1
65
+ tqdm==4.66.1
66
+ typer==0.9.0
67
+ typing_extensions==4.9.0
68
+ urllib3==2.2.0
69
+ uvicorn==0.27.0.post1
70
+ vega-datasets==0.9.0
71
+ websockets==11.0.3
72
+ wrapt==1.14.1