unit_1_quiz

Running on CPU Upgrade

App Files Files Community

487

not-lain commited on Feb 9

Commit

f04e8bb

verified ·

1 Parent(s): 50e0fff

Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

.gitattributes +35 -35
.gitignore +10 -10
.python-version +1 -1
app.py +256 -287
data_to_parquet.py +52 -0
example.json +81 -81
pyproject.toml +12 -12
requirements.txt +99 -99
uv.lock +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore CHANGED Viewed

@@ -1,10 +1,10 @@
-# Python-generated files
-__pycache__/
-*.py[oc]
-build/
-dist/
-wheels/
-*.egg-info
-# Virtual environments
-.venv

+# Python-generated files
+__pycache__/
+*.py[oc]
+build/
+dist/
+wheels/
+*.egg-info
+# Virtual environments
+.venv

.python-version CHANGED Viewed

	@@ -1 +1 @@
1	- 3.11


1	+ 3.11

app.py CHANGED Viewed

@@ -1,287 +1,256 @@
-import os
-from datetime import datetime
-import random
-import pandas as pd
-from huggingface_hub import HfApi, hf_hub_download, Repository
-from huggingface_hub.repocard import metadata_load
-import gradio as gr
-from datasets import load_dataset, Dataset
-from huggingface_hub import whoami
-EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
-EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
-EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.7
-ds = load_dataset(EXAM_DATASET_ID, split="train")
-DATASET_REPO_URL = "https://huggingface.co/datasets/agents-course/certificates"
-CERTIFIED_USERS_FILENAME = "certified_students.csv"
-CERTIFIED_USERS_DIR = "certificates"
-repo = Repository(
-    local_dir=CERTIFIED_USERS_DIR, clone_from=DATASET_REPO_URL, use_auth_token=os.getenv("HF_TOKEN")
-)
-# Convert dataset to a list of dicts and randomly sort
-quiz_data = ds.to_pandas().to_dict("records")
-random.shuffle(quiz_data)
-# Limit to max questions if specified
-if EXAM_MAX_QUESTIONS:
-    quiz_data = quiz_data[: int(EXAM_MAX_QUESTIONS)]
-def on_user_logged_in(token: gr.OAuthToken | None):
-    """
-    If the user has a valid token, show Start button.
-    Otherwise, keep the login button visible.
-    """
-    if token is not None:
-        return [
-            gr.update(visible=False),  # login button visibility
-            gr.update(visible=True),  # start button visibility
-            gr.update(visible=False),  # next button visibility
-            gr.update(visible=False),  # submit button visibility
-            "",  # question text
-            [],  # radio choices (empty list = no choices)
-            "Click 'Start' to begin the quiz",  # status message
-            0,  # question_idx
-            [],  # user_answers
-            "",  # final_markdown content
-            token,  # user token
-        ]
-    else:
-        return [
-            gr.update(visible=True),  # login button visibility
-            gr.update(visible=False),  # start button visibility
-            gr.update(visible=False),  # next button visibility
-            gr.update(visible=False),  # submit button visibility
-            "",  # question text
-            [],  # radio choices
-            "",  # status message
-            0,  # question_idx
-            [],  # user_answers
-            "",  # final_markdown content
-            None,  # no token
-        ]
-def add_certified_user(hf_username, pass_percentage, submission_time):
-  """
-  Add the certified user to the database
-  """
-  print("ADD CERTIFIED USER")
-  repo.git_pull()
-  history = pd.read_csv(os.path.join(CERTIFIED_USERS_DIR, CERTIFIED_USERS_FILENAME))
-  # Check if this hf_username is already in our dataset:
-  check = history.loc[history['hf_username'] == hf_username]
-  if not check.empty:
-    history = history.drop(labels=check.index[0], axis=0)
-  new_row = pd.DataFrame({'hf_username': hf_username, 'pass_percentage': pass_percentage, 'datetime': submission_time}, index=[0])
-  history = pd.concat([new_row, history[:]]).reset_index(drop=True)
-  history.to_csv(os.path.join(CERTIFIED_USERS_DIR, CERTIFIED_USERS_FILENAME), index=False)
-  repo.push_to_hub(commit_message="Update certified users list")
-def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
-    """
-    Create a new dataset from user_answers and push it to the Hub.
-    Calculates grade and checks against passing threshold.
-    """
-    if token is None:
-        gr.Warning("Please log in to Hugging Face before pushing!")
-        return
-    # Calculate grade
-    correct_count = sum(1 for answer in user_answers if answer["is_correct"])
-    total_questions = len(user_answers)
-    grade = correct_count / total_questions if total_questions > 0 else 0
-    if grade < float(EXAM_PASSING_SCORE):
-        gr.Warning(
-            f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}"
-        )
-        return f"You scored {grade:.1%}. Please try again to achieve at least {float(EXAM_PASSING_SCORE):.1%}"
-    gr.Info("Submitting answers to the Hub. Please wait...", duration=2)
-    user_info = whoami(token=token.token)
-    repo_id = f"{EXAM_DATASET_ID}_student_responses"
-    submission_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-    new_ds = Dataset.from_list(user_answers)
-    new_ds = new_ds.map(
-        lambda x: {
-            "username": user_info["name"],
-            "datetime": submission_time,
-            "grade": grade,
-        }
-    )
-    new_ds.push_to_hub(repo_id=repo_id, split=user_info["name"])
-    # I'm adding a csv version
-    # The idea, if the user passed, we create a simple row in a csv
-    print("ADD CERTIFIED USER")
-    # Add this user to our database
-    add_certified_user(user_info["name"], grade, submission_time)
-    return f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"
-def handle_quiz(question_idx, user_answers, selected_answer, is_start):
-    """
-    Handle quiz state transitions and store answers
-    """
-    if not is_start and question_idx < len(quiz_data):
-        current_q = quiz_data[question_idx]
-        correct_reference = current_q["correct_answer"]
-        correct_reference = f"answer_{correct_reference}".lower()
-        is_correct = selected_answer == current_q[correct_reference]
-        user_answers.append(
-            {
-                "question": current_q["question"],
-                "selected_answer": selected_answer,
-                "correct_answer": current_q[correct_reference],
-                "is_correct": is_correct,
-                "correct_reference": correct_reference,
-            }
-        )
-        question_idx += 1
-    if question_idx >= len(quiz_data):
-        correct_count = sum(1 for answer in user_answers if answer["is_correct"])
-        grade = correct_count / len(user_answers)
-        results_text = (
-            f"**Quiz Complete!**\n\n"
-            f"Your score: {grade:.1%}\n"
-            f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
-        )
-        return [
-            "",  # question_text
-            gr.update(choices=[], visible=False),  # hide radio choices
-            f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
-            question_idx,
-            user_answers,
-            gr.update(visible=False),  # start button visibility
-            gr.update(visible=False),  # next button visibility
-            gr.update(visible=True),  # submit button visibility
-            results_text,  # final results text
-        ]
-    # Show next question
-    q = quiz_data[question_idx]
-    return [
-        f"## Question {question_idx + 1} \n### {q['question']}",  # question text
-        gr.update(  # properly update radio choices
-            choices=[q["answer_a"], q["answer_b"], q["answer_c"], q["answer_d"]],
-            value=None,
-            visible=True,
-        ),
-        "Select an answer and click 'Next' to continue.",
-        question_idx,
-        user_answers,
-        gr.update(visible=False),  # start button visibility
-        gr.update(visible=True),  # next button visibility
-        gr.update(visible=False),  # submit button visibility
-        "",  # clear final markdown
-    ]
-def success_message(response):
-    # response is whatever push_results_to_hub returned
-    return f"{response}\n\n**Success!**"
-with gr.Blocks() as demo:
-    demo.title = f"Dataset Quiz for {EXAM_DATASET_ID}"
-    # State variables
-    question_idx = gr.State(value=0)
-    user_answers = gr.State(value=[])
-    user_token = gr.State(value=None)
-    with gr.Row(variant="compact"):
-        gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
-    with gr.Row(variant="compact"):
-        gr.Markdown(
-            "Log in first, then click 'Start' to begin. Answer each question, click 'Next', and finally click 'Submit' to publish your results to the Hugging Face Hub."
-        )
-    with gr.Row(variant="panel"):
-        question_text = gr.Markdown("")
-        radio_choices = gr.Radio(
-            choices=[], label="Your Answer", scale=1.5, visible=False
-        )
-    with gr.Row(variant="compact"):
-        status_text = gr.Markdown("")
-        final_markdown = gr.Markdown("")
-    with gr.Row(variant="compact"):
-        login_btn = gr.LoginButton(visible=True)
-        start_btn = gr.Button("Start ⏭️", visible=True)
-        next_btn = gr.Button("Next ⏭️", visible=False)
-        submit_btn = gr.Button("Submit ✅", visible=False)
-    # Wire up the event handlers
-    login_btn.click(
-        fn=on_user_logged_in,
-        inputs=None,
-        outputs=[
-            login_btn,
-            start_btn,
-            next_btn,
-            submit_btn,
-            question_text,
-            radio_choices,
-            status_text,
-            question_idx,
-            user_answers,
-            final_markdown,
-            user_token,
-        ],
-    )
-    start_btn.click(
-        fn=handle_quiz,
-        inputs=[question_idx, user_answers, gr.State(""), gr.State(True)],
-        outputs=[
-            question_text,
-            radio_choices,
-            status_text,
-            question_idx,
-            user_answers,
-            start_btn,
-            next_btn,
-            submit_btn,
-            final_markdown,
-        ],
-    )
-    next_btn.click(
-        fn=handle_quiz,
-        inputs=[question_idx, user_answers, radio_choices, gr.State(False)],
-        outputs=[
-            question_text,
-            radio_choices,
-            status_text,
-            question_idx,
-            user_answers,
-            start_btn,
-            next_btn,
-            submit_btn,
-            final_markdown,
-        ],
-    )
-    submit_btn.click(fn=push_results_to_hub, inputs=[user_answers])
-if __name__ == "__main__":
-    # Note: If testing locally, you'll need to run `huggingface-cli login` or set HF_TOKEN
-    # environment variable for the login to work locally.
-    demo.launch()

+import os
+import random
+from huggingface_hub import HfApi, whoami
+import gradio as gr
+from datasets import load_dataset
+from data_to_parquet import to_parquet
+EXAM_DATASET_ID = os.getenv("EXAM_DATASET_ID") or "agents-course/unit_1_quiz"
+EXAM_MAX_QUESTIONS = os.getenv("EXAM_MAX_QUESTIONS") or 10
+EXAM_PASSING_SCORE = os.getenv("EXAM_PASSING_SCORE") or 0.7
+ds = load_dataset(EXAM_DATASET_ID, split="train")
+upload_api = HfApi(token=os.getenv("HF_TOKEN"))
+# Convert dataset to a list of dicts and randomly sort
+quiz_data = ds.to_pandas().to_dict("records")
+random.shuffle(quiz_data)
+# Limit to max questions if specified
+if EXAM_MAX_QUESTIONS:
+    quiz_data = quiz_data[: int(EXAM_MAX_QUESTIONS)]
+def on_user_logged_in(token: gr.OAuthToken | None):
+    """
+    If the user has a valid token, show Start button.
+    Otherwise, keep the login button visible.
+    """
+    if token is not None:
+        return [
+            gr.update(visible=False),  # login button visibility
+            gr.update(visible=True),  # start button visibility
+            gr.update(visible=False),  # next button visibility
+            gr.update(visible=False),  # submit button visibility
+            "",  # question text
+            [],  # radio choices (empty list = no choices)
+            "Click 'Start' to begin the quiz",  # status message
+            0,  # question_idx
+            [],  # user_answers
+            "",  # final_markdown content
+            token,  # user token
+        ]
+    else:
+        return [
+            gr.update(visible=True),  # login button visibility
+            gr.update(visible=False),  # start button visibility
+            gr.update(visible=False),  # next button visibility
+            gr.update(visible=False),  # submit button visibility
+            "",  # question text
+            [],  # radio choices
+            "",  # status message
+            0,  # question_idx
+            [],  # user_answers
+            "",  # final_markdown content
+            None,  # no token
+        ]
+def push_results_to_hub(user_answers, token: gr.OAuthToken | None):
+    """
+    Create a new dataset from user_answers and push it to the Hub.
+    Calculates grade and checks against passing threshold.
+    """
+    if token is None:
+        gr.Warning("Please log in to Hugging Face before pushing!")
+        return
+    # Calculate grade
+    correct_count = sum(1 for answer in user_answers if answer["is_correct"])
+    total_questions = len(user_answers)
+    grade = correct_count / total_questions if total_questions > 0 else 0
+    if grade < float(EXAM_PASSING_SCORE):
+        gr.Warning(
+            f"Score {grade:.1%} below passing threshold of {float(EXAM_PASSING_SCORE):.1%}"
+        )
+        return  # do not continue
+    gr.Info("Submitting answers to the Hub. Please wait...", duration=2)
+    user_info = whoami(token=token.token)
+    # TODO:
+    # check if username already has "username.parquet" in the dataset and download that (or read values directly from dataset viewer if possible)
+    # instead of replacing the values check if the new score is better than the old one
+    to_parquet(
+        upload_api,  # api
+        "agents-course/students-data",  # repo_id
+        user_info["name"],  # username
+        grade,  # unit1 score
+        0.0,  # unit2 score
+        0.0,  # unit3 score
+        0.0,  # unit4 score
+        0,  # already certified or not
+    )
+    gr.Success(
+        f"Your responses have been submitted to the Hub! Final grade: {grade:.1%}"
+    )
+def handle_quiz(question_idx, user_answers, selected_answer, is_start):
+    """
+    Handle quiz state transitions and store answers
+    """
+    if not is_start and question_idx < len(quiz_data):
+        current_q = quiz_data[question_idx]
+        correct_reference = current_q["correct_answer"]
+        correct_reference = f"answer_{correct_reference}".lower()
+        is_correct = selected_answer == current_q[correct_reference]
+        user_answers.append(
+            {
+                "question": current_q["question"],
+                "selected_answer": selected_answer,
+                "correct_answer": current_q[correct_reference],
+                "is_correct": is_correct,
+                "correct_reference": correct_reference,
+            }
+        )
+        question_idx += 1
+    if question_idx >= len(quiz_data):
+        correct_count = sum(1 for answer in user_answers if answer["is_correct"])
+        grade = correct_count / len(user_answers)
+        results_text = (
+            f"**Quiz Complete!**\n\n"
+            f"Your score: {grade:.1%}\n"
+            f"Passing score: {float(EXAM_PASSING_SCORE):.1%}\n\n"
+        )
+        return [
+            "",  # question_text
+            gr.update(choices=[], visible=False),  # hide radio choices
+            f"{'✅ Passed!' if grade >= float(EXAM_PASSING_SCORE) else '❌ Did not pass'}",
+            question_idx,
+            user_answers,
+            gr.update(visible=False),  # start button visibility
+            gr.update(visible=False),  # next button visibility
+            gr.update(visible=True),  # submit button visibility
+            results_text,  # final results text
+        ]
+    # Show next question
+    q = quiz_data[question_idx]
+    return [
+        f"## Question {question_idx + 1} \n### {q['question']}",  # question text
+        gr.update(  # properly update radio choices
+            choices=[q["answer_a"], q["answer_b"], q["answer_c"], q["answer_d"]],
+            value=None,
+            visible=True,
+        ),
+        "Select an answer and click 'Next' to continue.",
+        question_idx,
+        user_answers,
+        gr.update(visible=False),  # start button visibility
+        gr.update(visible=True),  # next button visibility
+        gr.update(visible=False),  # submit button visibility
+        "",  # clear final markdown
+    ]
+def success_message(response):
+    # response is whatever push_results_to_hub returned
+    return f"{response}\n\n**Success!**"
+with gr.Blocks() as demo:
+    demo.title = f"Dataset Quiz for {EXAM_DATASET_ID}"
+    # State variables
+    question_idx = gr.State(value=0)
+    user_answers = gr.State(value=[])
+    user_token = gr.State(value=None)
+    with gr.Row(variant="compact"):
+        gr.Markdown(f"## Welcome to the {EXAM_DATASET_ID} Quiz")
+    with gr.Row(variant="compact"):
+        gr.Markdown(
+            "Log in first, then click 'Start' to begin. Answer each question, click 'Next', and finally click 'Submit' to publish your results to the Hugging Face Hub."
+        )
+    with gr.Row(variant="panel"):
+        question_text = gr.Markdown("")
+        radio_choices = gr.Radio(
+            choices=[], label="Your Answer", scale=1.5, visible=False
+        )
+    with gr.Row(variant="compact"):
+        status_text = gr.Markdown("")
+        final_markdown = gr.Markdown("")
+    with gr.Row(variant="compact"):
+        login_btn = gr.LoginButton(visible=True)
+        start_btn = gr.Button("Start ⏭️", visible=True)
+        next_btn = gr.Button("Next ⏭️", visible=False)
+        submit_btn = gr.Button("Submit ✅", visible=False)
+    # Wire up the event handlers
+    login_btn.click(
+        fn=on_user_logged_in,
+        inputs=None,
+        outputs=[
+            login_btn,
+            start_btn,
+            next_btn,
+            submit_btn,
+            question_text,
+            radio_choices,
+            status_text,
+            question_idx,
+            user_answers,
+            final_markdown,
+            user_token,
+        ],
+    )
+    start_btn.click(
+        fn=handle_quiz,
+        inputs=[question_idx, user_answers, gr.State(""), gr.State(True)],
+        outputs=[
+            question_text,
+            radio_choices,
+            status_text,
+            question_idx,
+            user_answers,
+            start_btn,
+            next_btn,
+            submit_btn,
+            final_markdown,
+        ],
+    )
+    next_btn.click(
+        fn=handle_quiz,
+        inputs=[question_idx, user_answers, radio_choices, gr.State(False)],
+        outputs=[
+            question_text,
+            radio_choices,
+            status_text,
+            question_idx,
+            user_answers,
+            start_btn,
+            next_btn,
+            submit_btn,
+            final_markdown,
+        ],
+    )
+    submit_btn.click(fn=push_results_to_hub, inputs=[user_answers])
+if __name__ == "__main__":
+    # Note: If testing locally, you'll need to run `huggingface-cli login` or set HF_TOKEN
+    # environment variable for the login to work locally.
+    demo.launch()

data_to_parquet.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import pyarrow as pa
+import pyarrow.parquet as pq
+import json
+import tempfile
+# current schema (refer to https://huggingface.co/spaces/phxia/dataset-builder/blob/main/dataset_uploader.py#L153 for more info)
+schema = {
+    "username": {"_type": "Value", "dtype": "string"},
+    "unit1": {"_type": "Value", "dtype": "float64"},
+    "unit2": {"_type": "Value", "dtype": "float64"},
+    "unit3": {"_type": "Value", "dtype": "float64"},
+    "unit4": {"_type": "Value", "dtype": "float64"},
+    "certified": {"_type": "Value", "dtype": "int64"},
+}
+def to_parquet(
+    api,
+    repo: str,
+    username: str = "",
+    unit1: float = 0.0,
+    unit2: float = 0.0,
+    unit3: float = 0.0,
+    unit4: float = 0.0,
+    certified: int = 0,
+):
+    data = {
+        "username": username,
+        "unit1": unit1 * 100 if unit1 != 0 else 0.0,
+        "unit2": unit2 * 100 if unit2 != 0 else 0.0,
+        "unit3": unit3 * 100 if unit3 != 0 else 0.0,
+        "unit4": unit4 * 100 if unit4 != 0 else 0.0,
+        "certified": certified,
+    }
+    # Export data to Arrow format
+    table = pa.Table.from_pylist([data])
+    # Add metadata (used by datasets library)
+    table = table.replace_schema_metadata(
+        {"huggingface": json.dumps({"info": {"features": schema}})}
+    )
+    # Write to parquet file
+    archive_file = tempfile.NamedTemporaryFile(delete=False)
+    pq.write_table(table, archive_file.name)
+    archive_file.close()
+    api.upload_file(
+        repo_id=repo,  # manually created repo
+        repo_type="dataset",
+        path_in_repo=f"{username}.parquet",  # each user will have their own parquet
+        path_or_fileobj=archive_file.name,
+    )

example.json CHANGED Viewed

@@ -1,82 +1,82 @@
-[
-    {
-        "question": "Which of the following best describes a Large Language Model (LLM)?",
-        "answer_a": "A model specializing in language recognition",
-        "answer_b": "A massive neural network that understands and generates human language",
-        "answer_c": "A model exclusively used for language data tasks like summarization or classification",
-        "answer_d": "A rule-based chatbot used for conversations",
-        "correct_answer": "B"
-    },
-    {
-        "question": "LLMs are typically:",
-        "answer_a": "Pre-trained on small, curated datasets",
-        "answer_b": "Trained on large text corpora to capture linguistic patterns",
-        "answer_c": "Trained purely on translation tasks",
-        "answer_d": "Designed to function solely with GPU resources",
-        "correct_answer": "B"
-    },
-    {
-        "question": "Which of the following is a common architecture for LLMs?",
-        "answer_a": "Convolutional Neural Networks (CNNs)",
-        "answer_b": "Transformer",
-        "answer_c": "Recurrent Neural Networks (RNNs) with LSTM",
-        "answer_d": "Support Vector Machines",
-        "correct_answer": "B"
-    },
-    {
-        "question": "What does it mean when we say LLMs are \"autoregressive\"?",
-        "answer_a": "They regress to the mean to reduce variance",
-        "answer_b": "They generate text by predicting the next token based on previous tokens",
-        "answer_c": "They can only handle labeled data",
-        "answer_d": "They can output text only after the entire input is known at once",
-        "correct_answer": "B"
-    },
-    {
-        "question": "Which of these is NOT a common use of LLMs?",
-        "answer_a": "Summarizing content",
-        "answer_b": "Generating code",
-        "answer_c": "Playing strategy games like chess or Go",
-        "answer_d": "Conversational AI",
-        "correct_answer": "C"
-    },
-    {
-        "question": "Which of the following best describes a \"special token\"?",
-        "answer_a": "A token that makes the model forget all context",
-        "answer_b": "A model signature required for API calls",
-        "answer_c": "A token that helps segment or structure the conversation in the model",
-        "answer_d": "A token that always represents the end of text",
-        "correct_answer": "C"
-    },
-    {
-        "question": "What is the primary goal of a \"chat template\"?",
-        "answer_a": "To force the model into a single-turn conversation",
-        "answer_b": "To structure interactions and define roles in a conversation",
-        "answer_c": "To replace the need for system messages",
-        "answer_d": "To store prompts into the model's weights permanently",
-        "correct_answer": "B"
-    },
-    {
-        "question": "How do tokenizers handle text for modern NLP models?",
-        "answer_a": "By splitting text into individual words only",
-        "answer_b": "By breaking words into subword units and assigning numerical IDs",
-        "answer_c": "By storing text directly without transformation",
-        "answer_d": "By removing all punctuation automatically",
-        "correct_answer": "B"
-    },
-    {
-        "question": "Which role in a conversation sets the overall behavior for a model?",
-        "answer_a": "user",
-        "answer_b": "system",
-        "answer_c": "assistant",
-        "answer_d": "developer",
-        "correct_answer": "B"
-    },
-    {
-        "question": "Which statement is TRUE about tool usage in chat templates?",
-        "answer_a": "Tools cannot be used within the conversation context.",
-        "answer_b": "Tools are used only for logging messages.",
-        "answer_c": "Tools allow the assistant to offload tasks like web search or calculations.",
-        "answer_d": "Tools are unsupported in all modern LLMs.",
-        "correct_answer": "C"
-    }
 ]

+[
+    {
+        "question": "Which of the following best describes a Large Language Model (LLM)?",
+        "answer_a": "A model specializing in language recognition",
+        "answer_b": "A massive neural network that understands and generates human language",
+        "answer_c": "A model exclusively used for language data tasks like summarization or classification",
+        "answer_d": "A rule-based chatbot used for conversations",
+        "correct_answer": "B"
+    },
+    {
+        "question": "LLMs are typically:",
+        "answer_a": "Pre-trained on small, curated datasets",
+        "answer_b": "Trained on large text corpora to capture linguistic patterns",
+        "answer_c": "Trained purely on translation tasks",
+        "answer_d": "Designed to function solely with GPU resources",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which of the following is a common architecture for LLMs?",
+        "answer_a": "Convolutional Neural Networks (CNNs)",
+        "answer_b": "Transformer",
+        "answer_c": "Recurrent Neural Networks (RNNs) with LSTM",
+        "answer_d": "Support Vector Machines",
+        "correct_answer": "B"
+    },
+    {
+        "question": "What does it mean when we say LLMs are \"autoregressive\"?",
+        "answer_a": "They regress to the mean to reduce variance",
+        "answer_b": "They generate text by predicting the next token based on previous tokens",
+        "answer_c": "They can only handle labeled data",
+        "answer_d": "They can output text only after the entire input is known at once",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which of these is NOT a common use of LLMs?",
+        "answer_a": "Summarizing content",
+        "answer_b": "Generating code",
+        "answer_c": "Playing strategy games like chess or Go",
+        "answer_d": "Conversational AI",
+        "correct_answer": "C"
+    },
+    {
+        "question": "Which of the following best describes a \"special token\"?",
+        "answer_a": "A token that makes the model forget all context",
+        "answer_b": "A model signature required for API calls",
+        "answer_c": "A token that helps segment or structure the conversation in the model",
+        "answer_d": "A token that always represents the end of text",
+        "correct_answer": "C"
+    },
+    {
+        "question": "What is the primary goal of a \"chat template\"?",
+        "answer_a": "To force the model into a single-turn conversation",
+        "answer_b": "To structure interactions and define roles in a conversation",
+        "answer_c": "To replace the need for system messages",
+        "answer_d": "To store prompts into the model's weights permanently",
+        "correct_answer": "B"
+    },
+    {
+        "question": "How do tokenizers handle text for modern NLP models?",
+        "answer_a": "By splitting text into individual words only",
+        "answer_b": "By breaking words into subword units and assigning numerical IDs",
+        "answer_c": "By storing text directly without transformation",
+        "answer_d": "By removing all punctuation automatically",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which role in a conversation sets the overall behavior for a model?",
+        "answer_a": "user",
+        "answer_b": "system",
+        "answer_c": "assistant",
+        "answer_d": "developer",
+        "correct_answer": "B"
+    },
+    {
+        "question": "Which statement is TRUE about tool usage in chat templates?",
+        "answer_a": "Tools cannot be used within the conversation context.",
+        "answer_b": "Tools are used only for logging messages.",
+        "answer_c": "Tools allow the assistant to offload tasks like web search or calculations.",
+        "answer_d": "Tools are unsupported in all modern LLMs.",
+        "correct_answer": "C"
+    }
 ]

pyproject.toml CHANGED Viewed

@@ -1,12 +1,12 @@
-[project]
-name = "quiz-app"
-version = "0.1.0"
-description = "Add your description here"
-readme = "README.md"
-requires-python = ">=3.11"
-dependencies = [
-    "datasets>=3.2.0",
-    "gradio[oauth]==5.15.0",
-    "huggingface-hub>=0.27.1",
-    "ipykernel>=6.29.5",
-]

+[project]
+name = "quiz-app"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "datasets>=3.2.0",
+    "gradio[oauth]==5.15.0",
+    "huggingface-hub>=0.27.1",
+    "ipykernel>=6.29.5",
+]

requirements.txt CHANGED Viewed

@@ -1,99 +1,99 @@
-# This file was autogenerated by uv via the following command:
-#    uv export --format requirements-txt --no-hashes
-aiofiles==23.2.1
-aiohappyeyeballs==2.4.4
-aiohttp==3.11.11
-aiosignal==1.3.2
-annotated-types==0.7.0
-anyio==4.8.0
-appnope==0.1.4 ; platform_system == 'Darwin'
-asttokens==3.0.0
-attrs==24.3.0
-audioop-lts==0.2.1 ; python_full_version >= '3.13'
-authlib==1.4.0
-certifi==2024.12.14
-cffi==1.17.1 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
-charset-normalizer==3.4.1
-click==8.1.8 ; sys_platform != 'emscripten'
-colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows'
-comm==0.2.2
-cryptography==44.0.0
-datasets==3.2.0
-debugpy==1.8.12
-decorator==5.1.1
-dill==0.3.8
-executing==2.2.0
-fastapi==0.115.7
-ffmpy==0.5.0
-filelock==3.17.0
-frozenlist==1.5.0
-fsspec==2024.9.0
-gradio==5.15.0
-gradio-client==1.7.0
-h11==0.14.0
-httpcore==1.0.7
-httpx==0.28.1
-huggingface-hub==0.28.1
-idna==3.10
-ipykernel==6.29.5
-ipython==8.31.0
-itsdangerous==2.2.0
-jedi==0.19.2
-jinja2==3.1.5
-jupyter-client==8.6.3
-jupyter-core==5.7.2
-markdown-it-py==3.0.0 ; sys_platform != 'emscripten'
-markupsafe==2.1.5
-matplotlib-inline==0.1.7
-mdurl==0.1.2 ; sys_platform != 'emscripten'
-multidict==6.1.0
-multiprocess==0.70.16
-nest-asyncio==1.6.0
-numpy==2.2.2
-orjson==3.10.15
-packaging==24.2
-pandas==2.2.3
-parso==0.8.4
-pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
-pillow==11.1.0
-platformdirs==4.3.6
-prompt-toolkit==3.0.50
-propcache==0.2.1
-psutil==6.1.1
-ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
-pure-eval==0.2.3
-pyarrow==19.0.0
-pycparser==2.22 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
-pydantic==2.10.6
-pydantic-core==2.27.2
-pydub==0.25.1
-pygments==2.19.1
-python-dateutil==2.9.0.post0
-python-multipart==0.0.20
-pytz==2024.2
-pywin32==308 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
-pyyaml==6.0.2
-pyzmq==26.2.0
-requests==2.32.3
-rich==13.9.4 ; sys_platform != 'emscripten'
-ruff==0.9.3 ; sys_platform != 'emscripten'
-safehttpx==0.1.6
-semantic-version==2.10.0
-shellingham==1.5.4 ; sys_platform != 'emscripten'
-six==1.17.0
-sniffio==1.3.1
-stack-data==0.6.3
-starlette==0.45.2
-tomlkit==0.13.2
-tornado==6.4.2
-tqdm==4.67.1
-traitlets==5.14.3
-typer==0.15.1 ; sys_platform != 'emscripten'
-typing-extensions==4.12.2
-tzdata==2025.1
-urllib3==2.3.0
-uvicorn==0.34.0 ; sys_platform != 'emscripten'
-wcwidth==0.2.13
-websockets==14.2
-xxhash==3.5.0
-yarl==1.18.3

+# This file was autogenerated by uv via the following command:
+#    uv export --format requirements-txt --no-hashes
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.4
+aiohttp==3.11.11
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.8.0
+appnope==0.1.4 ; platform_system == 'Darwin'
+asttokens==3.0.0
+attrs==24.3.0
+audioop-lts==0.2.1 ; python_full_version >= '3.13'
+authlib==1.4.0
+certifi==2024.12.14
+cffi==1.17.1 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
+charset-normalizer==3.4.1
+click==8.1.8 ; sys_platform != 'emscripten'
+colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows'
+comm==0.2.2
+cryptography==44.0.0
+datasets==3.2.0
+debugpy==1.8.12
+decorator==5.1.1
+dill==0.3.8
+executing==2.2.0
+fastapi==0.115.7
+ffmpy==0.5.0
+filelock==3.17.0
+frozenlist==1.5.0
+fsspec==2024.9.0
+gradio==5.15.0
+gradio-client==1.7.0
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+huggingface-hub==0.28.1
+idna==3.10
+ipykernel==6.29.5
+ipython==8.31.0
+itsdangerous==2.2.0
+jedi==0.19.2
+jinja2==3.1.5
+jupyter-client==8.6.3
+jupyter-core==5.7.2
+markdown-it-py==3.0.0 ; sys_platform != 'emscripten'
+markupsafe==2.1.5
+matplotlib-inline==0.1.7
+mdurl==0.1.2 ; sys_platform != 'emscripten'
+multidict==6.1.0
+multiprocess==0.70.16
+nest-asyncio==1.6.0
+numpy==2.2.2
+orjson==3.10.15
+packaging==24.2
+pandas==2.2.3
+parso==0.8.4
+pexpect==4.9.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
+pillow==11.1.0
+platformdirs==4.3.6
+prompt-toolkit==3.0.50
+propcache==0.2.1
+psutil==6.1.1
+ptyprocess==0.7.0 ; sys_platform != 'emscripten' and sys_platform != 'win32'
+pure-eval==0.2.3
+pyarrow==19.0.0
+pycparser==2.22 ; platform_python_implementation != 'PyPy' or implementation_name == 'pypy'
+pydantic==2.10.6
+pydantic-core==2.27.2
+pydub==0.25.1
+pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-multipart==0.0.20
+pytz==2024.2
+pywin32==308 ; platform_python_implementation != 'PyPy' and sys_platform == 'win32'
+pyyaml==6.0.2
+pyzmq==26.2.0
+requests==2.32.3
+rich==13.9.4 ; sys_platform != 'emscripten'
+ruff==0.9.3 ; sys_platform != 'emscripten'
+safehttpx==0.1.6
+semantic-version==2.10.0
+shellingham==1.5.4 ; sys_platform != 'emscripten'
+six==1.17.0
+sniffio==1.3.1
+stack-data==0.6.3
+starlette==0.45.2
+tomlkit==0.13.2
+tornado==6.4.2
+tqdm==4.67.1
+traitlets==5.14.3
+typer==0.15.1 ; sys_platform != 'emscripten'
+typing-extensions==4.12.2
+tzdata==2025.1
+urllib3==2.3.0
+uvicorn==0.34.0 ; sys_platform != 'emscripten'
+wcwidth==0.2.13
+websockets==14.2
+xxhash==3.5.0
+yarl==1.18.3

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff