ignacioct commited on
Commit
58d8c29
1 Parent(s): 63501f6

Add application file

Browse files
Files changed (1) hide show
  1. app.py +217 -0
app.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import Dict, Tuple
3
+ from uuid import UUID
4
+
5
+ import altair as alt
6
+ import argilla as rg
7
+ from argilla.feedback import FeedbackDataset
8
+ from argilla.client.feedback.dataset.remote.dataset import RemoteFeedbackDataset
9
+ import gradio as gr
10
+ import pandas as pd
11
+
12
+
13
+ def obtain_source_target_datasets() -> (
14
+ Tuple[
15
+ FeedbackDataset | RemoteFeedbackDataset, FeedbackDataset | RemoteFeedbackDataset
16
+ ]
17
+ ):
18
+ """
19
+ This function returns the source and target datasets to be used in the application.
20
+
21
+ Returns:
22
+ A tuple with the source and target datasets. The source dataset is filtered by the response status 'pending'.
23
+
24
+ """
25
+
26
+ # Obtain the public dataset and see how many pending records are there
27
+ source_dataset = rg.FeedbackDataset.from_argilla(
28
+ os.getenv("SOURCE_DATASET"), workspace=os.getenv("SOURCE_WORKSPACE")
29
+ )
30
+ filtered_source_dataset = source_dataset.filter_by(response_status=["pending"])
31
+
32
+ # Obtain a list of users from the private workspace
33
+ target_dataset = rg.FeedbackDataset.from_argilla(
34
+ os.getenv("RESULTS_DATASET"), workspace=os.getenv("RESULTS_WORKSPACE")
35
+ )
36
+
37
+ return filtered_source_dataset, target_dataset
38
+
39
+
40
+ def get_user_annotations_dictionary(
41
+ dataset: FeedbackDataset | RemoteFeedbackDataset,
42
+ ) -> Dict[str, int]:
43
+ """
44
+ This function returns a dictionary with the username as the key and the number of annotations as the value.
45
+
46
+ Args:
47
+ dataset: The dataset to be analyzed.
48
+ Returns:
49
+ A dictionary with the username as the key and the number of annotations as the value.
50
+ """
51
+ output = {}
52
+ for record in dataset:
53
+ for response in record.responses:
54
+ if str(response.user_id) not in output.keys():
55
+ output[str(response.user_id)] = 1
56
+ else:
57
+ output[str(response.user_id)] += 1
58
+
59
+ # Changing the name of the keys, from the id to the username
60
+ for key in list(output.keys()):
61
+ output[rg.User.from_id(UUID(key)).username] = output.pop(key)
62
+
63
+ return output
64
+
65
+
66
+ def donut_chart() -> alt.Chart:
67
+ """
68
+ This function returns a donut chart with the number of annotated and pending records.
69
+
70
+ Returns:
71
+ An altair chart with the donut chart.
72
+ """
73
+
74
+ source_dataset, _ = obtain_source_target_datasets()
75
+ annotated_records = len(source_dataset)
76
+ pending_records = int(os.getenv("TARGET_RECORDS")) - annotated_records
77
+
78
+ source = pd.DataFrame(
79
+ {
80
+ "values": [annotated_records, pending_records],
81
+ "category": ["Annotated", "Pending"], # Add a new column for categories
82
+ }
83
+ )
84
+
85
+ base = alt.Chart(source).encode(
86
+ theta=alt.Theta("values:Q", stack=True),
87
+ radius=alt.Radius(
88
+ "values", scale=alt.Scale(type="sqrt", zero=True, rangeMin=20)
89
+ ),
90
+ color=alt.Color("category:N", legend=alt.Legend(title="Category")),
91
+ )
92
+
93
+ c1 = base.mark_arc(innerRadius=20, stroke="#fff")
94
+
95
+ c2 = base.mark_text(radiusOffset=10).encode(text="values:Q")
96
+
97
+ chart = c1 + c2
98
+
99
+ return chart
100
+
101
+
102
+ def kpi_chart() -> alt.Chart:
103
+ """
104
+ This function returns a KPI chart with the total amount of annotators.
105
+
106
+ Returns:
107
+ An altair chart with the KPI chart.
108
+ """
109
+
110
+ # Obtain the total amount of annotators
111
+ _, target_dataset = obtain_source_target_datasets()
112
+ user_ids_annotations = get_user_annotations_dictionary(target_dataset)
113
+ total_annotators = len(user_ids_annotations)
114
+
115
+ # Assuming you have a DataFrame with user data, create a sample DataFrame
116
+ data = pd.DataFrame({"Category": ["Total Annotators"], "Value": [total_annotators]})
117
+
118
+ # Create Altair chart
119
+ chart = (
120
+ alt.Chart(data)
121
+ .mark_text(fontSize=100, align="center", baseline="middle", color="steelblue")
122
+ .encode(text="Value:N")
123
+ .properties(title="Number of Annotators", width=250, height=200)
124
+ )
125
+
126
+ return chart
127
+
128
+
129
+ def obtain_top_5_users(user_ids_annotations: Dict[str, int]) -> pd.DataFrame:
130
+ """
131
+ This function returns the top 5 users with the most annotations.
132
+
133
+ Args:
134
+ user_ids_annotations: A dictionary with the user ids as the key and the number of annotations as the value.
135
+
136
+ Returns:
137
+ A pandas dataframe with the top 5 users with the most annotations.
138
+ """
139
+
140
+ dataframe = pd.DataFrame(
141
+ user_ids_annotations.items(), columns=["Name", "Annotated Records"]
142
+ )
143
+ dataframe = dataframe.sort_values(by="Annotated Records", ascending=False)
144
+ return dataframe.head(5)
145
+
146
+
147
+ def main() -> None:
148
+
149
+ # Connect to the space with rg.init()
150
+ rg.init(
151
+ api_url=os.getenv("ARGILLA_API_URL"),
152
+ api_key=os.getenv("ARGILLA_API_KEY"),
153
+ extra_headers={"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"},
154
+ )
155
+
156
+ source_dataset, target_dataset = obtain_source_target_datasets()
157
+ user_ids_annotations = get_user_annotations_dictionary(target_dataset)
158
+
159
+ top5_dataframe = obtain_top_5_users(user_ids_annotations)
160
+
161
+ with gr.Blocks() as demo:
162
+ gr.Markdown(
163
+ """
164
+ # 🗣️ The Prompt Collective Dashboad
165
+
166
+ This Gradio dashboard shows the progress of the first "Data is Better Together" initiative to understand and collect good quality and diverse prompt for the OSS AI community.
167
+ If you want to contribute to OSS AI, join [the Prompt Collective HF Space](https://huggingface.co/spaces/DIBT/prompt-collective).
168
+ """
169
+ )
170
+ gr.Markdown(
171
+ """
172
+ ## 🚀 Contributors Progress
173
+
174
+ How many records have been submitted, how many are still pending?
175
+ """
176
+ )
177
+ plot = gr.Plot(label="Plot")
178
+ demo.load(
179
+ donut_chart,
180
+ inputs=[],
181
+ outputs=[plot],
182
+ )
183
+
184
+ gr.Markdown(
185
+ """
186
+ ## 👾 Contributors Hall of Fame
187
+ The number of all annotators and the top 5 users with the most responses are:
188
+ """
189
+ )
190
+
191
+ with gr.Row():
192
+
193
+ plot2 = gr.Plot(label="Plot")
194
+ demo.load(
195
+ kpi_chart,
196
+ inputs=[],
197
+ outputs=[plot2],
198
+ )
199
+
200
+ gr.Dataframe(
201
+ value=top5_dataframe,
202
+ headers=["Name", "Annotated Records"],
203
+ datatype=[
204
+ "str",
205
+ "number",
206
+ ],
207
+ row_count=5,
208
+ col_count=(2, "fixed"),
209
+ interactive=False,
210
+ ),
211
+
212
+ # Launch the Gradio interface
213
+ demo.launch()
214
+
215
+
216
+ if __name__ == "__main__":
217
+ main()