not-lain commited on
Commit
f04e8bb
·
verified ·
1 Parent(s): 50e0fff

Upload folder using huggingface_hub

Browse files
Files changed (9) hide show
  1. .gitattributes +35 -35
  2. .gitignore +10 -10
  3. .python-version +1 -1
  4. app.py +256 -287
  5. data_to_parquet.py +52 -0
  6. example.json +81 -81
  7. pyproject.toml +12 -12
  8. requirements.txt +99 -99
  9. uv.lock +0 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -1,10 +1,10 @@
1
- # Python-generated files
2
- __pycache__/
3
- *.py[oc]
4
- build/
5
- dist/
6
- wheels/
7
- *.egg-info
8
-
9
- # Virtual environments
10
- .venv
 
1
+ # Python-generated files
2
+ __pycache__/
3
+ *.py[oc]
4
+ build/
5
+ dist/
6
+ wheels/
7
+ *.egg-info
8
+
9
+ # Virtual environments
10
+ .venv
.python-version CHANGED
@@ -1 +1 @@
1
- 3.11
 
1
+ 3.11
app.py CHANGED
@@ -1,287 +1,256 @@
1
- import os
2
- from datetime import datetime
3
- import random
4
-
5
- import pandas as pd
6
- from huggingface_hub import HfApi, hf_hub_download, Repository
7
- from huggingface_hub.repocard import metadata_load
8
-
9
- import gradio as gr
10
- from datasets import load_dataset, Dataset
11
- from huggingface_hub import whoami
12
-
13
- EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
14
- EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
15
- EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.7
16
-
17
- ds = load_dataset(EXAM_DATASET_ID, split="train")
18
-
19
- DATASET_REPO_URL = "https://huggingface.co/datasets/agents-course/certificates"
20
- CERTIFIED_USERS_FILENAME = "certified_students.csv"
21
- CERTIFIED_USERS_DIR = "certificates"
22
- repo = Repository(
23
- local_dir=CERTIFIED_USERS_DIR, clone_from=DATASET_REPO_URL, use_auth_token=os.getenv("HF_TOKEN")
24
- )
25
-
26
- # Convert dataset to a list of dicts and randomly sort
27
- quiz_data = ds.to_pandas().to_dict("records")
28
- random.shuffle(quiz_data)
29
-
30
- # Limit to max questions if specified
31
- if EXAM_MAX_QUESTIONS:
32
- quiz_data = quiz_data[: int(EXAM_MAX_QUESTIONS)]
33
-
34
-
35
- def on_user_logged_in(token: gr.OAuthToken | None):
36
- """
37
- If the user has a valid token, show Start button.
38
- Otherwise, keep the login button visible.
39
- """
40
- if token is not None:
41
- return [
42
- gr.update(visible=False), # login button visibility
43
- gr.update(visible=True), # start button visibility
44
- gr.update(visible=False), # next button visibility
45
- gr.update(visible=False), # submit button visibility
46
- "", # question text
47
- [], # radio choices (empty list = no choices)
48
- "Click 'Start' to begin the quiz", # status message
49
- 0, # question_idx
50
- [], # user_answers
51
- "", # final_markdown content
52
- token, # user token
53
- ]
54
- else:
55
- return [
56
- gr.update(visible=True), # login button visibility
57
- gr.update(visible=False), # start button visibility
58
- gr.update(visible=False), # next button visibility
59
- gr.update(visible=False), # submit button visibility
60
- "", # question text
61
- [], # radio choices
62
- "", # status message
63
- 0, # question_idx
64
- [], # user_answers
65
- "", # final_markdown content
66
- None, # no token
67
- ]
68
-
69
- def add_certified_user(hf_username, pass_percentage, submission_time):
70
- """
71
- Add the certified user to the database
72
- """
73
- print("ADD CERTIFIED USER")
74
- repo.git_pull()
75
- history = pd.read_csv(os.path.join(CERTIFIED_USERS_DIR, CERTIFIED_USERS_FILENAME))
76
-
77
- # Check if this hf_username is already in our dataset:
78
- check = history.loc[history['hf_username'] == hf_username]
79
- if not check.empty:
80
- history = history.drop(labels=check.index[0], axis=0)
81
-
82
- new_row = pd.DataFrame({'hf_username': hf_username, 'pass_percentage': pass_percentage, 'datetime': submission_time}, index=[0])
83
- history = pd.concat([new_row, history[:]]).reset_index(drop=True)
84
-
85
- history.to_csv(os.path.join(CERTIFIED_USERS_DIR, CERTIFIED_USERS_FILENAME), index=False)
86
- repo.push_to_hub(commit_message="Update certified users list")
87
-
88
- def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
89
- """
90
- Create a new dataset from user_answers and push it to the Hub.
91
- Calculates grade and checks against passing threshold.
92
- """
93
- if token is None:
94
- gr.Warning("Please log in to Hugging Face before pushing!")
95
- return
96
-
97
- # Calculate grade
98
- correct_count = sum(1 for answer in user_answers if answer["is_correct"])
99
- total_questions = len(user_answers)
100
- grade = correct_count / total_questions if total_questions > 0 else 0
101
-
102
- if grade < float(EXAM_PASSING_SCORE):
103
- gr.Warning(
104
- f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}"
105
- )
106
- return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}"
107
-
108
- gr.Info("Submitting answers to the Hub. Please wait...", duration=2)
109
-
110
- user_info = whoami(token=token.token)
111
- repo_id = f"{EXAM_DATASET_ID}_student_responses"
112
- submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
113
-
114
- new_ds = Dataset.from_list(user_answers)
115
- new_ds = new_ds.map(
116
- lambda x: {
117
- "username": user_info["name"],
118
- "datetime": submission_time,
119
- "grade": grade,
120
- }
121
- )
122
- new_ds.push_to_hub(repo_id=repo_id, split=user_info["name"])
123
-
124
- # I'm adding a csv version
125
- # The idea, if the user passed, we create a simple row in a csv
126
- print("ADD CERTIFIED USER")
127
- # Add this user to our database
128
- add_certified_user(user_info["name"], grade, submission_time)
129
-
130
- return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"
131
-
132
-
133
-
134
-
135
- def handle_quiz(question_idx, user_answers, selected_answer, is_start):
136
- """
137
- Handle quiz state transitions and store answers
138
- """
139
- if not is_start and question_idx < len(quiz_data):
140
- current_q = quiz_data[question_idx]
141
- correct_reference = current_q["correct_answer"]
142
- correct_reference = f"answer_{correct_reference}".lower()
143
- is_correct = selected_answer == current_q[correct_reference]
144
- user_answers.append(
145
- {
146
- "question": current_q["question"],
147
- "selected_answer": selected_answer,
148
- "correct_answer": current_q[correct_reference],
149
- "is_correct": is_correct,
150
- "correct_reference": correct_reference,
151
- }
152
- )
153
- question_idx += 1
154
-
155
- if question_idx >= len(quiz_data):
156
- correct_count = sum(1 for answer in user_answers if answer["is_correct"])
157
- grade = correct_count / len(user_answers)
158
- results_text = (
159
- f"**Quiz Complete!**\n\n"
160
- f"Your score: {grade:.1%}\n"
161
- f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
162
- )
163
- return [
164
- "", # question_text
165
- gr.update(choices=[], visible=False), # hide radio choices
166
- f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
167
- question_idx,
168
- user_answers,
169
- gr.update(visible=False), # start button visibility
170
- gr.update(visible=False), # next button visibility
171
- gr.update(visible=True), # submit button visibility
172
- results_text, # final results text
173
- ]
174
-
175
- # Show next question
176
- q = quiz_data[question_idx]
177
- return [
178
- f"## Question {question_idx + 1} \n### {q['question']}", # question text
179
- gr.update( # properly update radio choices
180
- choices=[q["answer_a"], q["answer_b"], q["answer_c"], q["answer_d"]],
181
- value=None,
182
- visible=True,
183
- ),
184
- "Select an answer and click 'Next' to continue.",
185
- question_idx,
186
- user_answers,
187
- gr.update(visible=False), # start button visibility
188
- gr.update(visible=True), # next button visibility
189
- gr.update(visible=False), # submit button visibility
190
- "", # clear final markdown
191
- ]
192
-
193
-
194
- def success_message(response):
195
- # response is whatever push_results_to_hub returned
196
- return f"{response}\n\n**Success!**"
197
-
198
-
199
- with gr.Blocks() as demo:
200
- demo.title = f"Dataset Quiz for {EXAM_DATASET_ID}"
201
-
202
- # State variables
203
- question_idx = gr.State(value=0)
204
- user_answers = gr.State(value=[])
205
- user_token = gr.State(value=None)
206
-
207
- with gr.Row(variant="compact"):
208
- gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
209
-
210
- with gr.Row(variant="compact"):
211
- gr.Markdown(
212
- "Log in first, then click 'Start' to begin. Answer each question, click 'Next', and finally click 'Submit' to publish your results to the Hugging Face Hub."
213
- )
214
-
215
- with gr.Row(variant="panel"):
216
- question_text = gr.Markdown("")
217
- radio_choices = gr.Radio(
218
- choices=[], label="Your Answer", scale=1.5, visible=False
219
- )
220
-
221
- with gr.Row(variant="compact"):
222
- status_text = gr.Markdown("")
223
- final_markdown = gr.Markdown("")
224
-
225
- with gr.Row(variant="compact"):
226
- login_btn = gr.LoginButton(visible=True)
227
- start_btn = gr.Button("Start ⏭️", visible=True)
228
- next_btn = gr.Button("Next ⏭️", visible=False)
229
- submit_btn = gr.Button("Submit ✅", visible=False)
230
-
231
- # Wire up the event handlers
232
- login_btn.click(
233
- fn=on_user_logged_in,
234
- inputs=None,
235
- outputs=[
236
- login_btn,
237
- start_btn,
238
- next_btn,
239
- submit_btn,
240
- question_text,
241
- radio_choices,
242
- status_text,
243
- question_idx,
244
- user_answers,
245
- final_markdown,
246
- user_token,
247
- ],
248
- )
249
-
250
- start_btn.click(
251
- fn=handle_quiz,
252
- inputs=[question_idx, user_answers, gr.State(""), gr.State(True)],
253
- outputs=[
254
- question_text,
255
- radio_choices,
256
- status_text,
257
- question_idx,
258
- user_answers,
259
- start_btn,
260
- next_btn,
261
- submit_btn,
262
- final_markdown,
263
- ],
264
- )
265
-
266
- next_btn.click(
267
- fn=handle_quiz,
268
- inputs=[question_idx, user_answers, radio_choices, gr.State(False)],
269
- outputs=[
270
- question_text,
271
- radio_choices,
272
- status_text,
273
- question_idx,
274
- user_answers,
275
- start_btn,
276
- next_btn,
277
- submit_btn,
278
- final_markdown,
279
- ],
280
- )
281
-
282
- submit_btn.click(fn=push_results_to_hub, inputs=[user_answers])
283
-
284
- if __name__ == "__main__":
285
- # Note: If testing locally, you'll need to run `huggingface-cli login` or set HF_TOKEN
286
- # environment variable for the login to work locally.
287
- demo.launch()
 
1
+ import os
2
+ import random
3
+
4
+ from huggingface_hub import HfApi, whoami
5
+
6
+ import gradio as gr
7
+ from datasets import load_dataset
8
+
9
+ from data_to_parquet import to_parquet
10
+
11
+ EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
12
+ EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
13
+ EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.7
14
+
15
+ ds = load_dataset(EXAM_DATASET_ID, split="train")
16
+
17
+ upload_api = HfApi(token=os.getenv("HF_TOKEN"))
18
+ # Convert dataset to a list of dicts and randomly sort
19
+ quiz_data = ds.to_pandas().to_dict("records")
20
+ random.shuffle(quiz_data)
21
+
22
+ # Limit to max questions if specified
23
+ if EXAM_MAX_QUESTIONS:
24
+ quiz_data = quiz_data[: int(EXAM_MAX_QUESTIONS)]
25
+
26
+
27
+ def on_user_logged_in(token: gr.OAuthToken | None):
28
+ """
29
+ If the user has a valid token, show Start button.
30
+ Otherwise, keep the login button visible.
31
+ """
32
+ if token is not None:
33
+ return [
34
+ gr.update(visible=False), # login button visibility
35
+ gr.update(visible=True), # start button visibility
36
+ gr.update(visible=False), # next button visibility
37
+ gr.update(visible=False), # submit button visibility
38
+ "", # question text
39
+ [], # radio choices (empty list = no choices)
40
+ "Click 'Start' to begin the quiz", # status message
41
+ 0, # question_idx
42
+ [], # user_answers
43
+ "", # final_markdown content
44
+ token, # user token
45
+ ]
46
+ else:
47
+ return [
48
+ gr.update(visible=True), # login button visibility
49
+ gr.update(visible=False), # start button visibility
50
+ gr.update(visible=False), # next button visibility
51
+ gr.update(visible=False), # submit button visibility
52
+ "", # question text
53
+ [], # radio choices
54
+ "", # status message
55
+ 0, # question_idx
56
+ [], # user_answers
57
+ "", # final_markdown content
58
+ None, # no token
59
+ ]
60
+
61
+
62
+ def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
63
+ """
64
+ Create a new dataset from user_answers and push it to the Hub.
65
+ Calculates grade and checks against passing threshold.
66
+ """
67
+ if token is None:
68
+ gr.Warning("Please log in to Hugging Face before pushing!")
69
+ return
70
+
71
+ # Calculate grade
72
+ correct_count = sum(1 for answer in user_answers if answer["is_correct"])
73
+ total_questions = len(user_answers)
74
+ grade = correct_count / total_questions if total_questions > 0 else 0
75
+
76
+ if grade < float(EXAM_PASSING_SCORE):
77
+ gr.Warning(
78
+ f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}"
79
+ )
80
+ return # do not continue
81
+
82
+ gr.Info("Submitting answers to the Hub. Please wait...", duration=2)
83
+
84
+ user_info = whoami(token=token.token)
85
+ # TODO:
86
+ # check if username already has "username.parquet" in the dataset and download that (or read values directly from dataset viewer if possible)
87
+ # instead of replacing the values check if the new score is better than the old one
88
+ to_parquet(
89
+ upload_api, # api
90
+ "agents-course/students-data", # repo_id
91
+ user_info["name"], # username
92
+ grade, # unit1 score
93
+ 0.0, # unit2 score
94
+ 0.0, # unit3 score
95
+ 0.0, # unit4 score
96
+ 0, # already certified or not
97
+ )
98
+
99
+ gr.Success(
100
+ f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"
101
+ )
102
+
103
+
104
+ def handle_quiz(question_idx, user_answers, selected_answer, is_start):
105
+ """
106
+ Handle quiz state transitions and store answers
107
+ """
108
+ if not is_start and question_idx < len(quiz_data):
109
+ current_q = quiz_data[question_idx]
110
+ correct_reference = current_q["correct_answer"]
111
+ correct_reference = f"answer_{correct_reference}".lower()
112
+ is_correct = selected_answer == current_q[correct_reference]
113
+ user_answers.append(
114
+ {
115
+ "question": current_q["question"],
116
+ "selected_answer": selected_answer,
117
+ "correct_answer": current_q[correct_reference],
118
+ "is_correct": is_correct,
119
+ "correct_reference": correct_reference,
120
+ }
121
+ )
122
+ question_idx += 1
123
+
124
+ if question_idx >= len(quiz_data):
125
+ correct_count = sum(1 for answer in user_answers if answer["is_correct"])
126
+ grade = correct_count / len(user_answers)
127
+ results_text = (
128
+ f"**Quiz Complete!**\n\n"
129
+ f"Your score: {grade:.1%}\n"
130
+ f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
131
+ )
132
+ return [
133
+ "", # question_text
134
+ gr.update(choices=[], visible=False), # hide radio choices
135
+ f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
136
+ question_idx,
137
+ user_answers,
138
+ gr.update(visible=False), # start button visibility
139
+ gr.update(visible=False), # next button visibility
140
+ gr.update(visible=True), # submit button visibility
141
+ results_text, # final results text
142
+ ]
143
+
144
+ # Show next question
145
+ q = quiz_data[question_idx]
146
+ return [
147
+ f"## Question {question_idx + 1} \n### {q['question']}", # question text
148
+ gr.update( # properly update radio choices
149
+ choices=[q["answer_a"], q["answer_b"], q["answer_c"], q["answer_d"]],
150
+ value=None,
151
+ visible=True,
152
+ ),
153
+ "Select an answer and click 'Next' to continue.",
154
+ question_idx,
155
+ user_answers,
156
+ gr.update(visible=False), # start button visibility
157
+ gr.update(visible=True), # next button visibility
158
+ gr.update(visible=False), # submit button visibility
159
+ "", # clear final markdown
160
+ ]
161
+
162
+
163
+ def success_message(response):
164
+ # response is whatever push_results_to_hub returned
165
+ return f"{response}\n\n**Success!**"
166
+
167
+
168
+ with gr.Blocks() as demo:
169
+ demo.title = f"Dataset Quiz for {EXAM_DATASET_ID}"
170
+
171
+ # State variables
172
+ question_idx = gr.State(value=0)
173
+ user_answers = gr.State(value=[])
174
+ user_token = gr.State(value=None)
175
+
176
+ with gr.Row(variant="compact"):
177
+ gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
178
+
179
+ with gr.Row(variant="compact"):
180
+ gr.Markdown(
181
+ "Log in first, then click 'Start' to begin. Answer each question, click 'Next', and finally click 'Submit' to publish your results to the Hugging Face Hub."
182
+ )
183
+
184
+ with gr.Row(variant="panel"):
185
+ question_text = gr.Markdown("")
186
+ radio_choices = gr.Radio(
187
+ choices=[], label="Your Answer", scale=1.5, visible=False
188
+ )
189
+
190
+ with gr.Row(variant="compact"):
191
+ status_text = gr.Markdown("")
192
+ final_markdown = gr.Markdown("")
193
+
194
+ with gr.Row(variant="compact"):
195
+ login_btn = gr.LoginButton(visible=True)
196
+ start_btn = gr.Button("Start ⏭️", visible=True)
197
+ next_btn = gr.Button("Next ⏭️", visible=False)
198
+ submit_btn = gr.Button("Submit ✅", visible=False)
199
+
200
+ # Wire up the event handlers
201
+ login_btn.click(
202
+ fn=on_user_logged_in,
203
+ inputs=None,
204
+ outputs=[
205
+ login_btn,
206
+ start_btn,
207
+ next_btn,
208
+ submit_btn,
209
+ question_text,
210
+ radio_choices,
211
+ status_text,
212
+ question_idx,
213
+ user_answers,
214
+ final_markdown,
215
+ user_token,
216
+ ],
217
+ )
218
+
219
+ start_btn.click(
220
+ fn=handle_quiz,
221
+ inputs=[question_idx, user_answers, gr.State(""), gr.State(True)],
222
+ outputs=[
223
+ question_text,
224
+ radio_choices,
225
+ status_text,
226
+ question_idx,
227
+ user_answers,
228
+ start_btn,
229
+ next_btn,
230
+ submit_btn,
231
+ final_markdown,
232
+ ],
233
+ )
234
+
235
+ next_btn.click(
236
+ fn=handle_quiz,
237
+ inputs=[question_idx, user_answers, radio_choices, gr.State(False)],
238
+ outputs=[
239
+ question_text,
240
+ radio_choices,
241
+ status_text,
242
+ question_idx,
243
+ user_answers,
244
+ start_btn,
245
+ next_btn,
246
+ submit_btn,
247
+ final_markdown,
248
+ ],
249
+ )
250
+
251
+ submit_btn.click(fn=push_results_to_hub, inputs=[user_answers])
252
+
253
+ if __name__ == "__main__":
254
+ # Note: If testing locally, you'll need to run `huggingface-cli login` or set HF_TOKEN
255
+ # environment variable for the login to work locally.
256
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
data_to_parquet.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pyarrow as pa
2
+ import pyarrow.parquet as pq
3
+ import json
4
+ import tempfile
5
+
6
+
7
+ # current schema (refer to https://huggingface.co/spaces/phxia/dataset-builder/blob/main/dataset_uploader.py#L153 for more info)
8
+ schema = {
9
+ "username": {"_type": "Value", "dtype": "string"},
10
+ "unit1": {"_type": "Value", "dtype": "float64"},
11
+ "unit2": {"_type": "Value", "dtype": "float64"},
12
+ "unit3": {"_type": "Value", "dtype": "float64"},
13
+ "unit4": {"_type": "Value", "dtype": "float64"},
14
+ "certified": {"_type": "Value", "dtype": "int64"},
15
+ }
16
+
17
+
18
+ def to_parquet(
19
+ api,
20
+ repo: str,
21
+ username: str = "",
22
+ unit1: float = 0.0,
23
+ unit2: float = 0.0,
24
+ unit3: float = 0.0,
25
+ unit4: float = 0.0,
26
+ certified: int = 0,
27
+ ):
28
+ data = {
29
+ "username": username,
30
+ "unit1": unit1 * 100 if unit1 != 0 else 0.0,
31
+ "unit2": unit2 * 100 if unit2 != 0 else 0.0,
32
+ "unit3": unit3 * 100 if unit3 != 0 else 0.0,
33
+ "unit4": unit4 * 100 if unit4 != 0 else 0.0,
34
+ "certified": certified,
35
+ }
36
+ # Export data to Arrow format
37
+ table = pa.Table.from_pylist([data])
38
+ # Add metadata (used by datasets library)
39
+ table = table.replace_schema_metadata(
40
+ {"huggingface": json.dumps({"info": {"features": schema}})}
41
+ )
42
+ # Write to parquet file
43
+ archive_file = tempfile.NamedTemporaryFile(delete=False)
44
+ pq.write_table(table, archive_file.name)
45
+ archive_file.close()
46
+
47
+ api.upload_file(
48
+ repo_id=repo, # manually created repo
49
+ repo_type="dataset",
50
+ path_in_repo=f"{username}.parquet", # each user will have their own parquet
51
+ path_or_fileobj=archive_file.name,
52
+ )
example.json CHANGED
@@ -1,82 +1,82 @@
1
- [
2
- {
3
- "question": "Which of the following best describes a Large Language Model (LLM)?",
4
- "answer_a": "A model specializing in language recognition",
5
- "answer_b": "A massive neural network that understands and generates human language",
6
- "answer_c": "A model exclusively used for language data tasks like summarization or classification",
7
- "answer_d": "A rule-based chatbot used for conversations",
8
- "correct_answer": "B"
9
- },
10
- {
11
- "question": "LLMs are typically:",
12
- "answer_a": "Pre-trained on small, curated datasets",
13
- "answer_b": "Trained on large text corpora to capture linguistic patterns",
14
- "answer_c": "Trained purely on translation tasks",
15
- "answer_d": "Designed to function solely with GPU resources",
16
- "correct_answer": "B"
17
- },
18
- {
19
- "question": "Which of the following is a common architecture for LLMs?",
20
- "answer_a": "Convolutional Neural Networks (CNNs)",
21
- "answer_b": "Transformer",
22
- "answer_c": "Recurrent Neural Networks (RNNs) with LSTM",
23
- "answer_d": "Support Vector Machines",
24
- "correct_answer": "B"
25
- },
26
- {
27
- "question": "What does it mean when we say LLMs are \"autoregressive\"?",
28
- "answer_a": "They regress to the mean to reduce variance",
29
- "answer_b": "They generate text by predicting the next token based on previous tokens",
30
- "answer_c": "They can only handle labeled data",
31
- "answer_d": "They can output text only after the entire input is known at once",
32
- "correct_answer": "B"
33
- },
34
- {
35
- "question": "Which of these is NOT a common use of LLMs?",
36
- "answer_a": "Summarizing content",
37
- "answer_b": "Generating code",
38
- "answer_c": "Playing strategy games like chess or Go",
39
- "answer_d": "Conversational AI",
40
- "correct_answer": "C"
41
- },
42
- {
43
- "question": "Which of the following best describes a \"special token\"?",
44
- "answer_a": "A token that makes the model forget all context",
45
- "answer_b": "A model signature required for API calls",
46
- "answer_c": "A token that helps segment or structure the conversation in the model",
47
- "answer_d": "A token that always represents the end of text",
48
- "correct_answer": "C"
49
- },
50
- {
51
- "question": "What is the primary goal of a \"chat template\"?",
52
- "answer_a": "To force the model into a single-turn conversation",
53
- "answer_b": "To structure interactions and define roles in a conversation",
54
- "answer_c": "To replace the need for system messages",
55
- "answer_d": "To store prompts into the model's weights permanently",
56
- "correct_answer": "B"
57
- },
58
- {
59
- "question": "How do tokenizers handle text for modern NLP models?",
60
- "answer_a": "By splitting text into individual words only",
61
- "answer_b": "By breaking words into subword units and assigning numerical IDs",
62
- "answer_c": "By storing text directly without transformation",
63
- "answer_d": "By removing all punctuation automatically",
64
- "correct_answer": "B"
65
- },
66
- {
67
- "question": "Which role in a conversation sets the overall behavior for a model?",
68
- "answer_a": "user",
69
- "answer_b": "system",
70
- "answer_c": "assistant",
71
- "answer_d": "developer",
72
- "correct_answer": "B"
73
- },
74
- {
75
- "question": "Which statement is TRUE about tool usage in chat templates?",
76
- "answer_a": "Tools cannot be used within the conversation context.",
77
- "answer_b": "Tools are used only for logging messages.",
78
- "answer_c": "Tools allow the assistant to offload tasks like web search or calculations.",
79
- "answer_d": "Tools are unsupported in all modern LLMs.",
80
- "correct_answer": "C"
81
- }
82
  ]
 
1
+ [
2
+ {
3
+ "question": "Which of the following best describes a Large Language Model (LLM)?",
4
+ "answer_a": "A model specializing in language recognition",
5
+ "answer_b": "A massive neural network that understands and generates human language",
6
+ "answer_c": "A model exclusively used for language data tasks like summarization or classification",
7
+ "answer_d": "A rule-based chatbot used for conversations",
8
+ "correct_answer": "B"
9
+ },
10
+ {
11
+ "question": "LLMs are typically:",
12
+ "answer_a": "Pre-trained on small, curated datasets",
13
+ "answer_b": "Trained on large text corpora to capture linguistic patterns",
14
+ "answer_c": "Trained purely on translation tasks",
15
+ "answer_d": "Designed to function solely with GPU resources",
16
+ "correct_answer": "B"
17
+ },
18
+ {
19
+ "question": "Which of the following is a common architecture for LLMs?",
20
+ "answer_a": "Convolutional Neural Networks (CNNs)",
21
+ "answer_b": "Transformer",
22
+ "answer_c": "Recurrent Neural Networks (RNNs) with LSTM",
23
+ "answer_d": "Support Vector Machines",
24
+ "correct_answer": "B"
25
+ },
26
+ {
27
+ "question": "What does it mean when we say LLMs are \"autoregressive\"?",
28
+ "answer_a": "They regress to the mean to reduce variance",
29
+ "answer_b": "They generate text by predicting the next token based on previous tokens",
30
+ "answer_c": "They can only handle labeled data",
31
+ "answer_d": "They can output text only after the entire input is known at once",
32
+ "correct_answer": "B"
33
+ },
34
+ {
35
+ "question": "Which of these is NOT a common use of LLMs?",
36
+ "answer_a": "Summarizing content",
37
+ "answer_b": "Generating code",
38
+ "answer_c": "Playing strategy games like chess or Go",
39
+ "answer_d": "Conversational AI",
40
+ "correct_answer": "C"
41
+ },
42
+ {
43
+ "question": "Which of the following best describes a \"special token\"?",
44
+ "answer_a": "A token that makes the model forget all context",
45
+ "answer_b": "A model signature required for API calls",
46
+ "answer_c": "A token that helps segment or structure the conversation in the model",
47
+ "answer_d": "A token that always represents the end of text",
48
+ "correct_answer": "C"
49
+ },
50
+ {
51
+ "question": "What is the primary goal of a \"chat template\"?",
52
+ "answer_a": "To force the model into a single-turn conversation",
53
+ "answer_b": "To structure interactions and define roles in a conversation",
54
+ "answer_c": "To replace the need for system messages",
55
+ "answer_d": "To store prompts into the model's weights permanently",
56
+ "correct_answer": "B"
57
+ },
58
+ {
59
+ "question": "How do tokenizers handle text for modern NLP models?",
60
+ "answer_a": "By splitting text into individual words only",
61
+ "answer_b": "By breaking words into subword units and assigning numerical IDs",
62
+ "answer_c": "By storing text directly without transformation",
63
+ "answer_d": "By removing all punctuation automatically",
64
+ "correct_answer": "B"
65
+ },
66
+ {
67
+ "question": "Which role in a conversation sets the overall behavior for a model?",
68
+ "answer_a": "user",
69
+ "answer_b": "system",
70
+ "answer_c": "assistant",
71
+ "answer_d": "developer",
72
+ "correct_answer": "B"
73
+ },
74
+ {
75
+ "question": "Which statement is TRUE about tool usage in chat templates?",
76
+ "answer_a": "Tools cannot be used within the conversation context.",
77
+ "answer_b": "Tools are used only for logging messages.",
78
+ "answer_c": "Tools allow the assistant to offload tasks like web search or calculations.",
79
+ "answer_d": "Tools are unsupported in all modern LLMs.",
80
+ "correct_answer": "C"
81
+ }
82
  ]
pyproject.toml CHANGED
@@ -1,12 +1,12 @@
1
- [project]
2
- name = "quiz-app"
3
- version = "0.1.0"
4
- description = "Add your description here"
5
- readme = "README.md"
6
- requires-python = ">=3.11"
7
- dependencies = [
8
- "datasets>=3.2.0",
9
- "gradio[oauth]==5.15.0",
10
- "huggingface-hub>=0.27.1",
11
- "ipykernel>=6.29.5",
12
- ]
 
1
+ [project]
2
+ name = "quiz-app"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "datasets>=3.2.0",
9
+ "gradio[oauth]==5.15.0",
10
+ "huggingface-hub>=0.27.1",
11
+ "ipykernel>=6.29.5",
12
+ ]
requirements.txt CHANGED
@@ -1,99 +1,99 @@
1
- # This file was autogenerated by uv via the following command:
2
- # uv export --format requirements-txt --no-hashes
3
- aiofiles==23.2.1
4
- aiohappyeyeballs==2.4.4
5
- aiohttp==3.11.11
6
- aiosignal==1.3.2
7
- annotated-types==0.7.0
8
- anyio==4.8.0
9
- appnope==0.1.4 ; platform_system == 'Darwin'
10
- asttokens==3.0.0
11
- attrs==24.3.0
12
- audioop-lts==0.2.1 ; python_full_version >= '3.13'
13
- authlib==1.4.0
14
- certifi==2024.12.14
15
- cffi==1.17.1 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
16
- charset-normalizer==3.4.1
17
- click==8.1.8 ; sys_platform != 'emscripten'
18
- colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows'
19
- comm==0.2.2
20
- cryptography==44.0.0
21
- datasets==3.2.0
22
- debugpy==1.8.12
23
- decorator==5.1.1
24
- dill==0.3.8
25
- executing==2.2.0
26
- fastapi==0.115.7
27
- ffmpy==0.5.0
28
- filelock==3.17.0
29
- frozenlist==1.5.0
30
- fsspec==2024.9.0
31
- gradio==5.15.0
32
- gradio-client==1.7.0
33
- h11==0.14.0
34
- httpcore==1.0.7
35
- httpx==0.28.1
36
- huggingface-hub==0.28.1
37
- idna==3.10
38
- ipykernel==6.29.5
39
- ipython==8.31.0
40
- itsdangerous==2.2.0
41
- jedi==0.19.2
42
- jinja2==3.1.5
43
- jupyter-client==8.6.3
44
- jupyter-core==5.7.2
45
- markdown-it-py==3.0.0 ; sys_platform != 'emscripten'
46
- markupsafe==2.1.5
47
- matplotlib-inline==0.1.7
48
- mdurl==0.1.2 ; sys_platform != 'emscripten'
49
- multidict==6.1.0
50
- multiprocess==0.70.16
51
- nest-asyncio==1.6.0
52
- numpy==2.2.2
53
- orjson==3.10.15
54
- packaging==24.2
55
- pandas==2.2.3
56
- parso==0.8.4
57
- pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
58
- pillow==11.1.0
59
- platformdirs==4.3.6
60
- prompt-toolkit==3.0.50
61
- propcache==0.2.1
62
- psutil==6.1.1
63
- ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
64
- pure-eval==0.2.3
65
- pyarrow==19.0.0
66
- pycparser==2.22 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
67
- pydantic==2.10.6
68
- pydantic-core==2.27.2
69
- pydub==0.25.1
70
- pygments==2.19.1
71
- python-dateutil==2.9.0.post0
72
- python-multipart==0.0.20
73
- pytz==2024.2
74
- pywin32==308 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
75
- pyyaml==6.0.2
76
- pyzmq==26.2.0
77
- requests==2.32.3
78
- rich==13.9.4 ; sys_platform != 'emscripten'
79
- ruff==0.9.3 ; sys_platform != 'emscripten'
80
- safehttpx==0.1.6
81
- semantic-version==2.10.0
82
- shellingham==1.5.4 ; sys_platform != 'emscripten'
83
- six==1.17.0
84
- sniffio==1.3.1
85
- stack-data==0.6.3
86
- starlette==0.45.2
87
- tomlkit==0.13.2
88
- tornado==6.4.2
89
- tqdm==4.67.1
90
- traitlets==5.14.3
91
- typer==0.15.1 ; sys_platform != 'emscripten'
92
- typing-extensions==4.12.2
93
- tzdata==2025.1
94
- urllib3==2.3.0
95
- uvicorn==0.34.0 ; sys_platform != 'emscripten'
96
- wcwidth==0.2.13
97
- websockets==14.2
98
- xxhash==3.5.0
99
- yarl==1.18.3
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv export --format requirements-txt --no-hashes
3
+ aiofiles==23.2.1
4
+ aiohappyeyeballs==2.4.4
5
+ aiohttp==3.11.11
6
+ aiosignal==1.3.2
7
+ annotated-types==0.7.0
8
+ anyio==4.8.0
9
+ appnope==0.1.4 ; platform_system == 'Darwin'
10
+ asttokens==3.0.0
11
+ attrs==24.3.0
12
+ audioop-lts==0.2.1 ; python_full_version >= '3.13'
13
+ authlib==1.4.0
14
+ certifi==2024.12.14
15
+ cffi==1.17.1 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
16
+ charset-normalizer==3.4.1
17
+ click==8.1.8 ; sys_platform != 'emscripten'
18
+ colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows'
19
+ comm==0.2.2
20
+ cryptography==44.0.0
21
+ datasets==3.2.0
22
+ debugpy==1.8.12
23
+ decorator==5.1.1
24
+ dill==0.3.8
25
+ executing==2.2.0
26
+ fastapi==0.115.7
27
+ ffmpy==0.5.0
28
+ filelock==3.17.0
29
+ frozenlist==1.5.0
30
+ fsspec==2024.9.0
31
+ gradio==5.15.0
32
+ gradio-client==1.7.0
33
+ h11==0.14.0
34
+ httpcore==1.0.7
35
+ httpx==0.28.1
36
+ huggingface-hub==0.28.1
37
+ idna==3.10
38
+ ipykernel==6.29.5
39
+ ipython==8.31.0
40
+ itsdangerous==2.2.0
41
+ jedi==0.19.2
42
+ jinja2==3.1.5
43
+ jupyter-client==8.6.3
44
+ jupyter-core==5.7.2
45
+ markdown-it-py==3.0.0 ; sys_platform != 'emscripten'
46
+ markupsafe==2.1.5
47
+ matplotlib-inline==0.1.7
48
+ mdurl==0.1.2 ; sys_platform != 'emscripten'
49
+ multidict==6.1.0
50
+ multiprocess==0.70.16
51
+ nest-asyncio==1.6.0
52
+ numpy==2.2.2
53
+ orjson==3.10.15
54
+ packaging==24.2
55
+ pandas==2.2.3
56
+ parso==0.8.4
57
+ pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
58
+ pillow==11.1.0
59
+ platformdirs==4.3.6
60
+ prompt-toolkit==3.0.50
61
+ propcache==0.2.1
62
+ psutil==6.1.1
63
+ ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
64
+ pure-eval==0.2.3
65
+ pyarrow==19.0.0
66
+ pycparser==2.22 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
67
+ pydantic==2.10.6
68
+ pydantic-core==2.27.2
69
+ pydub==0.25.1
70
+ pygments==2.19.1
71
+ python-dateutil==2.9.0.post0
72
+ python-multipart==0.0.20
73
+ pytz==2024.2
74
+ pywin32==308 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
75
+ pyyaml==6.0.2
76
+ pyzmq==26.2.0
77
+ requests==2.32.3
78
+ rich==13.9.4 ; sys_platform != 'emscripten'
79
+ ruff==0.9.3 ; sys_platform != 'emscripten'
80
+ safehttpx==0.1.6
81
+ semantic-version==2.10.0
82
+ shellingham==1.5.4 ; sys_platform != 'emscripten'
83
+ six==1.17.0
84
+ sniffio==1.3.1
85
+ stack-data==0.6.3
86
+ starlette==0.45.2
87
+ tomlkit==0.13.2
88
+ tornado==6.4.2
89
+ tqdm==4.67.1
90
+ traitlets==5.14.3
91
+ typer==0.15.1 ; sys_platform != 'emscripten'
92
+ typing-extensions==4.12.2
93
+ tzdata==2025.1
94
+ urllib3==2.3.0
95
+ uvicorn==0.34.0 ; sys_platform != 'emscripten'
96
+ wcwidth==0.2.13
97
+ websockets==14.2
98
+ xxhash==3.5.0
99
+ yarl==1.18.3
uv.lock CHANGED
The diff for this file is too large to render. See raw diff