Spaces:

JetBrains-Research
/

long-code-arena

Running

App Files Files Community

saridormi commited on Dec 18, 2023

Commit

9203553

•

1 Parent(s): d47b526

Start rendering actual data + minor improvements

Browse files

Files changed (7) hide show

app.py +39 -15
requirements.txt +1 -0
src/content.py +3 -1
src/get_results_for_task.py +29 -1
src/leaderboard_formatting.py +39 -0
src/submission_uploader.py +8 -2
src/tasks.py +1 -1

app.py CHANGED Viewed

@@ -1,19 +1,41 @@
 import os
 import gradio as gr  # type: ignore[import]
-from src.content import (INTRODUCTION_TEXT, INTRODUCTION_TITLE,
-                         LEADERBOARD_TEXT, LEADERBOARD_TITLE,
-                         SUBMISSION_TEXT_FILES, SUBMISSION_TEXT_INTRO,
-                         SUBMISSION_TEXT_METADATA, SUBMISSION_TEXT_SUBMIT,
-                         SUBMISSION_TEXT_TASK, SUBMISSION_TITLE)
-from src.get_results_for_task import get_results_for_task_stub
 from src.submission_uploader import SubmissionUploader
 from src.tasks import TASKS_DESCRIPTIONS, TASKS_PRETTY, TASKS_PRETTY_REVERSE
 submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
 with gr.Blocks() as demo:
     gr.HTML(INTRODUCTION_TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
@@ -28,7 +50,7 @@ with gr.Blocks() as demo:
                     gr.Markdown(TASKS_DESCRIPTIONS[task])
                 leaderboard_table = gr.components.Dataframe(
-                    value=get_results_for_task_stub(task), interactive=False
                 )
     gr.HTML(SUBMISSION_TITLE)
@@ -55,26 +77,25 @@ with gr.Blocks() as demo:
                 )
                 context_size_textbox = gr.Textbox(
                     label="Context Size",
-                    placeholder="Context size (in tokens) used for the submission.",
                 )
             with gr.Column():
                 submitted_by_textbox = gr.Textbox(
                     label="Submitted By",
-                    placeholder="Who submitted the model, how it will be displayed on the leaderboard.",
                 )
                 contact_textbox = gr.Textbox(
                     label="Contact Information",
-                    placeholder="How Long Code Arena team can contact you in case of any questions (won't go to public dataset).",
                 )
                 comment_textbox = gr.Textbox(
                     label="Comment",
                     placeholder="Any comments you have for Long Code Arena team (optional, won't go to public dataset).",
                 )
-                url_textbox = gr.Textbox(
-                    label="Relevant URLs",
-                    placeholder="URLs to relevant resources (preprint/blogpost/code/etc.) with "
-                    "additional details about your submission.",
-                )
         gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
         file_output = gr.File(file_count="multiple")
@@ -98,4 +119,7 @@ with gr.Blocks() as demo:
         )
 if __name__ == "__main__":
     demo.launch()

+import logging
 import os
 import gradio as gr  # type: ignore[import]
+from apscheduler.schedulers.background import BackgroundScheduler
+from huggingface_hub import HfApi
+from src.content import (
+    INTRODUCTION_TEXT,
+    INTRODUCTION_TITLE,
+    LEADERBOARD_TEXT,
+    LEADERBOARD_TITLE,
+    SUBMISSION_TEXT_FILES,
+    SUBMISSION_TEXT_INTRO,
+    SUBMISSION_TEXT_METADATA,
+    SUBMISSION_TEXT_SUBMIT,
+    SUBMISSION_TEXT_TASK,
+    SUBMISSION_TITLE,
+)
+from src.get_results_for_task import get_results_for_task
 from src.submission_uploader import SubmissionUploader
 from src.tasks import TASKS_DESCRIPTIONS, TASKS_PRETTY, TASKS_PRETTY_REVERSE
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(message)s",
+    handlers=[logging.StreamHandler()],
+)
 submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
+def restart_space():
+    HfApi(token=os.environ["HF_TOKEN"]).restart_space(
+        repo_id="JetBrains-Research/long-code-arena"
+    )
 with gr.Blocks() as demo:
     gr.HTML(INTRODUCTION_TITLE)
     gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
                     gr.Markdown(TASKS_DESCRIPTIONS[task])
                 leaderboard_table = gr.components.Dataframe(
+                    value=get_results_for_task(task), interactive=False
                 )
     gr.HTML(SUBMISSION_TITLE)
                 )
                 context_size_textbox = gr.Textbox(
                     label="Context Size",
+                    placeholder="Context size in tokens used for the submission (should be an integer).",
                 )
             with gr.Column():
                 submitted_by_textbox = gr.Textbox(
                     label="Submitted By",
+                    placeholder="How to display on the leaderboard who submitted the model.",
+                )
+                url_textbox = gr.Textbox(
+                    label="Relevant URLs",
+                    placeholder="URLs to relevant resources with additional details about your submission (optional).",
                 )
                 contact_textbox = gr.Textbox(
                     label="Contact Information",
+                    placeholder="How Long Code Arena team can contact you (won't go to public dataset).",
                 )
                 comment_textbox = gr.Textbox(
                     label="Comment",
                     placeholder="Any comments you have for Long Code Arena team (optional, won't go to public dataset).",
                 )
         gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
         file_output = gr.File(file_count="multiple")
         )
 if __name__ == "__main__":
+    scheduler = BackgroundScheduler()
+    scheduler.add_job(restart_space, "interval", seconds=30 * 60)
+    scheduler.start()
     demo.launch()

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@ huggingface_hub
 jsonlines
 pandas
 tqdm
 # CMG metrics
 evaluate
 bert-score

 jsonlines
 pandas
 tqdm
+apscheduler
 # CMG metrics
 evaluate
 bert-score

src/content.py CHANGED Viewed

@@ -28,4 +28,6 @@ SUBMISSION_TEXT_FILES = """3. Attach one or more files with your model's predict
     * If several files are attached, they will be treated as separate runs of the submitted model (e.g., with different seeds), and the metrics will be averaged across runs. For baselines provided by 🏟️ Long Code Arena Team, the results are averaged across 3 runs.
     * Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by 🏟️ Long Code Arena Team in  🤗 [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional.
 """
-SUBMISSION_TEXT_SUBMIT = """All set! A new PR to 🤗 [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results) should be opened when you press "Submit" button. 🏟️ Long Code Arena Team will review it shortly, and the results will appear in the leaderboard."""

     * If several files are attached, they will be treated as separate runs of the submitted model (e.g., with different seeds), and the metrics will be averaged across runs. For baselines provided by 🏟️ Long Code Arena Team, the results are averaged across 3 runs.
     * Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by 🏟️ Long Code Arena Team in  🤗 [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional.
 """
+SUBMISSION_TEXT_SUBMIT = """All set! A new PR to 🤗 [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results) should be opened when you press "Submit" button. 🏟️ Long Code Arena Team will review it shortly, and the results will appear in the leaderboard.
+⏳ **Note:** It might take some time (up to 40 minutes) for PR to get created, since it involves computing metrics for your submission."""

src/get_results_for_task.py CHANGED Viewed

@@ -1,7 +1,15 @@
 import pandas as pd  # type: ignore[import]
-def get_results_for_task_stub(task: str) -> pd.DataFrame:
     stub_df = pd.DataFrame(
         [
             {
@@ -29,3 +37,23 @@ def get_results_for_task_stub(task: str) -> pd.DataFrame:
         ]
     )
     return stub_df

+import logging
+import os
 import pandas as pd  # type: ignore[import]
+from datasets import get_dataset_config_names, load_dataset  # type: ignore[import]
+from .leaderboard_formatting import COLUMNS_PRETTY, get_columns_per_task
+AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
+def _get_results_stub() -> pd.DataFrame:
     stub_df = pd.DataFrame(
         [
             {
         ]
     )
     return stub_df
+def _get_results_dataset(task_id: str) -> pd.DataFrame:
+    results_df = load_dataset(
+        os.environ["DATASET_ID"], task_id, split="test"
+    ).to_pandas()
+    results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
+    results_df["Context Size"] = results_df["Context Size"].map(
+        lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x
+    )
+    results_df = results_df[get_columns_per_task(task_id)]
+    return results_df
+def get_results_for_task(task_id: str) -> pd.DataFrame:
+    if task_id in AVAILABLE_TASKS:
+        logging.info(f"Retrieving results for {task_id}...")
+        return _get_results_dataset(task_id)
+    logging.info(f"Generating leaderboard stub for {task_id}...")
+    return _get_results_stub()

src/leaderboard_formatting.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from typing import List
+COLUMNS_PRETTY = {
+    "bleu": "BLEU",
+    "chrf": "ChrF",
+    "rouge1": "ROUGE-1",
+    "rouge2": "ROUGE-2",
+    "rougeL": "ROUGE-L",
+    "bertscore": "BERTScore",
+    "bertscore_normalized": "BERTScore (Normalized)",
+    "model_name": "Model",
+    "model_availability": "Availability",
+    "urls": "URLs",
+    "context_size": "Context Size",
+    "submitted_by": "Submitted By",
+}
+METRICS_PER_TASK = {
+    "commit_message_generation": [
+        "BLEU",
+        "ChrF",
+        "ROUGE-1",
+        "ROUGE-2",
+        "ROUGE-L",
+        "BERTScore",
+        "BERTScore (Normalized)",
+    ]
+}
+def get_columns_per_task(task_id: str) -> List[str]:
+    metrics_per_task = METRICS_PER_TASK[task_id]
+    return (
+        ["Model Name", "Availability", "Context Size"]
+        + metrics_per_task
+        + ["Submitted By", "URLs"]
+    )

src/submission_uploader.py CHANGED Viewed

@@ -156,6 +156,7 @@ class SubmissionUploader:
     def _verify_arguments(
         self,
         model_folder: str,
         model_name_pretty: str,
         model_availability: str,
@@ -164,6 +165,9 @@ class SubmissionUploader:
         submitted_by: str,
         filenames: Optional[List[str]],
     ):
         assert (
             model_folder
         ), "Please, specify non-empty name for a directory with a model's results."
@@ -200,6 +204,7 @@ class SubmissionUploader:
     ) -> str:
         try:
             self._verify_arguments(
                 model_folder=model_folder,
                 model_name_pretty=model_name_pretty,
                 model_availability=model_availability,
@@ -208,12 +213,13 @@ class SubmissionUploader:
                 submitted_by=submitted_by,
                 filenames=filenames,
             )
             pr_title = f"🚀 New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
             task_id = TASKS_PRETTY_REVERSE[task_pretty]
-            logging.info("Checking if this request is already submitted...")
             if not force:
                 if model_name_pretty in self._fs.ls(
                     f"datasets/{self._dataset_id}/{task_id}/predictions"

     def _verify_arguments(
         self,
+        task_pretty: str,
         model_folder: str,
         model_name_pretty: str,
         model_availability: str,
         submitted_by: str,
         filenames: Optional[List[str]],
     ):
+        assert (
+            task_pretty and task_pretty in TASKS_PRETTY_REVERSE
+        ), "Please, select one of the supported tasks."
         assert (
             model_folder
         ), "Please, specify non-empty name for a directory with a model's results."
     ) -> str:
         try:
             self._verify_arguments(
+                task_pretty=task_pretty,
                 model_folder=model_folder,
                 model_name_pretty=model_name_pretty,
                 model_availability=model_availability,
                 submitted_by=submitted_by,
                 filenames=filenames,
             )
             pr_title = f"🚀 New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
+            logging.info(f"Start processing {pr_title}")
             task_id = TASKS_PRETTY_REVERSE[task_pretty]
+            logging.info("Checking if this request has already been submitted...")
             if not force:
                 if model_name_pretty in self._fs.ls(
                     f"datasets/{self._dataset_id}/{task_id}/predictions"

src/tasks.py CHANGED Viewed

@@ -17,7 +17,7 @@ TASKS_DESCRIPTIONS = {
         * [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
         * [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
         * [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
-        * [BERTScore](https://huggingface.co/spaces/evaluate-metric/berscore)
         For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
         """,

         * [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
         * [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
         * [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
+        * [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
         For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
         """,