saridormi commited on
Commit
9203553
β€’
1 Parent(s): d47b526

Start rendering actual data + minor improvements

Browse files
app.py CHANGED
@@ -1,19 +1,41 @@
 
1
  import os
2
 
3
  import gradio as gr # type: ignore[import]
 
 
4
 
5
- from src.content import (INTRODUCTION_TEXT, INTRODUCTION_TITLE,
6
- LEADERBOARD_TEXT, LEADERBOARD_TITLE,
7
- SUBMISSION_TEXT_FILES, SUBMISSION_TEXT_INTRO,
8
- SUBMISSION_TEXT_METADATA, SUBMISSION_TEXT_SUBMIT,
9
- SUBMISSION_TEXT_TASK, SUBMISSION_TITLE)
10
- from src.get_results_for_task import get_results_for_task_stub
 
 
 
 
 
 
 
11
  from src.submission_uploader import SubmissionUploader
12
  from src.tasks import TASKS_DESCRIPTIONS, TASKS_PRETTY, TASKS_PRETTY_REVERSE
13
 
 
 
 
 
 
 
14
  submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
15
 
16
 
 
 
 
 
 
 
17
  with gr.Blocks() as demo:
18
  gr.HTML(INTRODUCTION_TITLE)
19
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
@@ -28,7 +50,7 @@ with gr.Blocks() as demo:
28
  gr.Markdown(TASKS_DESCRIPTIONS[task])
29
 
30
  leaderboard_table = gr.components.Dataframe(
31
- value=get_results_for_task_stub(task), interactive=False
32
  )
33
 
34
  gr.HTML(SUBMISSION_TITLE)
@@ -55,26 +77,25 @@ with gr.Blocks() as demo:
55
  )
56
  context_size_textbox = gr.Textbox(
57
  label="Context Size",
58
- placeholder="Context size (in tokens) used for the submission.",
59
  )
60
  with gr.Column():
61
  submitted_by_textbox = gr.Textbox(
62
  label="Submitted By",
63
- placeholder="Who submitted the model, how it will be displayed on the leaderboard.",
 
 
 
 
64
  )
65
  contact_textbox = gr.Textbox(
66
  label="Contact Information",
67
- placeholder="How Long Code Arena team can contact you in case of any questions (won't go to public dataset).",
68
  )
69
  comment_textbox = gr.Textbox(
70
  label="Comment",
71
  placeholder="Any comments you have for Long Code Arena team (optional, won't go to public dataset).",
72
  )
73
- url_textbox = gr.Textbox(
74
- label="Relevant URLs",
75
- placeholder="URLs to relevant resources (preprint/blogpost/code/etc.) with "
76
- "additional details about your submission.",
77
- )
78
 
79
  gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
80
  file_output = gr.File(file_count="multiple")
@@ -98,4 +119,7 @@ with gr.Blocks() as demo:
98
  )
99
 
100
  if __name__ == "__main__":
 
 
 
101
  demo.launch()
 
1
+ import logging
2
  import os
3
 
4
  import gradio as gr # type: ignore[import]
5
+ from apscheduler.schedulers.background import BackgroundScheduler
6
+ from huggingface_hub import HfApi
7
 
8
+ from src.content import (
9
+ INTRODUCTION_TEXT,
10
+ INTRODUCTION_TITLE,
11
+ LEADERBOARD_TEXT,
12
+ LEADERBOARD_TITLE,
13
+ SUBMISSION_TEXT_FILES,
14
+ SUBMISSION_TEXT_INTRO,
15
+ SUBMISSION_TEXT_METADATA,
16
+ SUBMISSION_TEXT_SUBMIT,
17
+ SUBMISSION_TEXT_TASK,
18
+ SUBMISSION_TITLE,
19
+ )
20
+ from src.get_results_for_task import get_results_for_task
21
  from src.submission_uploader import SubmissionUploader
22
  from src.tasks import TASKS_DESCRIPTIONS, TASKS_PRETTY, TASKS_PRETTY_REVERSE
23
 
24
+ logging.basicConfig(
25
+ level=logging.INFO,
26
+ format="%(asctime)s [%(levelname)s] %(message)s",
27
+ handlers=[logging.StreamHandler()],
28
+ )
29
+
30
  submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
31
 
32
 
33
+ def restart_space():
34
+ HfApi(token=os.environ["HF_TOKEN"]).restart_space(
35
+ repo_id="JetBrains-Research/long-code-arena"
36
+ )
37
+
38
+
39
  with gr.Blocks() as demo:
40
  gr.HTML(INTRODUCTION_TITLE)
41
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
50
  gr.Markdown(TASKS_DESCRIPTIONS[task])
51
 
52
  leaderboard_table = gr.components.Dataframe(
53
+ value=get_results_for_task(task), interactive=False
54
  )
55
 
56
  gr.HTML(SUBMISSION_TITLE)
 
77
  )
78
  context_size_textbox = gr.Textbox(
79
  label="Context Size",
80
+ placeholder="Context size in tokens used for the submission (should be an integer).",
81
  )
82
  with gr.Column():
83
  submitted_by_textbox = gr.Textbox(
84
  label="Submitted By",
85
+ placeholder="How to display on the leaderboard who submitted the model.",
86
+ )
87
+ url_textbox = gr.Textbox(
88
+ label="Relevant URLs",
89
+ placeholder="URLs to relevant resources with additional details about your submission (optional).",
90
  )
91
  contact_textbox = gr.Textbox(
92
  label="Contact Information",
93
+ placeholder="How Long Code Arena team can contact you (won't go to public dataset).",
94
  )
95
  comment_textbox = gr.Textbox(
96
  label="Comment",
97
  placeholder="Any comments you have for Long Code Arena team (optional, won't go to public dataset).",
98
  )
 
 
 
 
 
99
 
100
  gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
101
  file_output = gr.File(file_count="multiple")
 
119
  )
120
 
121
  if __name__ == "__main__":
122
+ scheduler = BackgroundScheduler()
123
+ scheduler.add_job(restart_space, "interval", seconds=30 * 60)
124
+ scheduler.start()
125
  demo.launch()
requirements.txt CHANGED
@@ -2,6 +2,7 @@ huggingface_hub
2
  jsonlines
3
  pandas
4
  tqdm
 
5
  # CMG metrics
6
  evaluate
7
  bert-score
 
2
  jsonlines
3
  pandas
4
  tqdm
5
+ apscheduler
6
  # CMG metrics
7
  evaluate
8
  bert-score
src/content.py CHANGED
@@ -28,4 +28,6 @@ SUBMISSION_TEXT_FILES = """3. Attach one or more files with your model's predict
28
  * If several files are attached, they will be treated as separate runs of the submitted model (e.g., with different seeds), and the metrics will be averaged across runs. For baselines provided by 🏟️ Long Code Arena Team, the results are averaged across 3 runs.
29
  * Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by 🏟️ Long Code Arena Team in πŸ€— [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional.
30
  """
31
- SUBMISSION_TEXT_SUBMIT = """All set! A new PR to πŸ€— [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results) should be opened when you press "Submit" button. 🏟️ Long Code Arena Team will review it shortly, and the results will appear in the leaderboard."""
 
 
 
28
  * If several files are attached, they will be treated as separate runs of the submitted model (e.g., with different seeds), and the metrics will be averaged across runs. For baselines provided by 🏟️ Long Code Arena Team, the results are averaged across 3 runs.
29
  * Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by 🏟️ Long Code Arena Team in πŸ€— [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional.
30
  """
31
+ SUBMISSION_TEXT_SUBMIT = """All set! A new PR to πŸ€— [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results) should be opened when you press "Submit" button. 🏟️ Long Code Arena Team will review it shortly, and the results will appear in the leaderboard.
32
+
33
+ ⏳ **Note:** It might take some time (up to 40 minutes) for PR to get created, since it involves computing metrics for your submission."""
src/get_results_for_task.py CHANGED
@@ -1,7 +1,15 @@
 
 
 
1
  import pandas as pd # type: ignore[import]
 
 
 
2
 
 
3
 
4
- def get_results_for_task_stub(task: str) -> pd.DataFrame:
 
5
  stub_df = pd.DataFrame(
6
  [
7
  {
@@ -29,3 +37,23 @@ def get_results_for_task_stub(task: str) -> pd.DataFrame:
29
  ]
30
  )
31
  return stub_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+
4
  import pandas as pd # type: ignore[import]
5
+ from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
6
+
7
+ from .leaderboard_formatting import COLUMNS_PRETTY, get_columns_per_task
8
 
9
+ AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
10
 
11
+
12
+ def _get_results_stub() -> pd.DataFrame:
13
  stub_df = pd.DataFrame(
14
  [
15
  {
 
37
  ]
38
  )
39
  return stub_df
40
+
41
+
42
+ def _get_results_dataset(task_id: str) -> pd.DataFrame:
43
+ results_df = load_dataset(
44
+ os.environ["DATASET_ID"], task_id, split="test"
45
+ ).to_pandas()
46
+ results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
47
+ results_df["Context Size"] = results_df["Context Size"].map(
48
+ lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x
49
+ )
50
+ results_df = results_df[get_columns_per_task(task_id)]
51
+ return results_df
52
+
53
+
54
+ def get_results_for_task(task_id: str) -> pd.DataFrame:
55
+ if task_id in AVAILABLE_TASKS:
56
+ logging.info(f"Retrieving results for {task_id}...")
57
+ return _get_results_dataset(task_id)
58
+ logging.info(f"Generating leaderboard stub for {task_id}...")
59
+ return _get_results_stub()
src/leaderboard_formatting.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ COLUMNS_PRETTY = {
4
+ "bleu": "BLEU",
5
+ "chrf": "ChrF",
6
+ "rouge1": "ROUGE-1",
7
+ "rouge2": "ROUGE-2",
8
+ "rougeL": "ROUGE-L",
9
+ "bertscore": "BERTScore",
10
+ "bertscore_normalized": "BERTScore (Normalized)",
11
+ "model_name": "Model",
12
+ "model_availability": "Availability",
13
+ "urls": "URLs",
14
+ "context_size": "Context Size",
15
+ "submitted_by": "Submitted By",
16
+ }
17
+
18
+
19
+ METRICS_PER_TASK = {
20
+ "commit_message_generation": [
21
+ "BLEU",
22
+ "ChrF",
23
+ "ROUGE-1",
24
+ "ROUGE-2",
25
+ "ROUGE-L",
26
+ "BERTScore",
27
+ "BERTScore (Normalized)",
28
+ ]
29
+ }
30
+
31
+
32
+ def get_columns_per_task(task_id: str) -> List[str]:
33
+ metrics_per_task = METRICS_PER_TASK[task_id]
34
+
35
+ return (
36
+ ["Model Name", "Availability", "Context Size"]
37
+ + metrics_per_task
38
+ + ["Submitted By", "URLs"]
39
+ )
src/submission_uploader.py CHANGED
@@ -156,6 +156,7 @@ class SubmissionUploader:
156
 
157
  def _verify_arguments(
158
  self,
 
159
  model_folder: str,
160
  model_name_pretty: str,
161
  model_availability: str,
@@ -164,6 +165,9 @@ class SubmissionUploader:
164
  submitted_by: str,
165
  filenames: Optional[List[str]],
166
  ):
 
 
 
167
  assert (
168
  model_folder
169
  ), "Please, specify non-empty name for a directory with a model's results."
@@ -200,6 +204,7 @@ class SubmissionUploader:
200
  ) -> str:
201
  try:
202
  self._verify_arguments(
 
203
  model_folder=model_folder,
204
  model_name_pretty=model_name_pretty,
205
  model_availability=model_availability,
@@ -208,12 +213,13 @@ class SubmissionUploader:
208
  submitted_by=submitted_by,
209
  filenames=filenames,
210
  )
211
-
212
  pr_title = f"πŸš€ New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
213
 
 
 
214
  task_id = TASKS_PRETTY_REVERSE[task_pretty]
215
 
216
- logging.info("Checking if this request is already submitted...")
217
  if not force:
218
  if model_name_pretty in self._fs.ls(
219
  f"datasets/{self._dataset_id}/{task_id}/predictions"
 
156
 
157
  def _verify_arguments(
158
  self,
159
+ task_pretty: str,
160
  model_folder: str,
161
  model_name_pretty: str,
162
  model_availability: str,
 
165
  submitted_by: str,
166
  filenames: Optional[List[str]],
167
  ):
168
+ assert (
169
+ task_pretty and task_pretty in TASKS_PRETTY_REVERSE
170
+ ), "Please, select one of the supported tasks."
171
  assert (
172
  model_folder
173
  ), "Please, specify non-empty name for a directory with a model's results."
 
204
  ) -> str:
205
  try:
206
  self._verify_arguments(
207
+ task_pretty=task_pretty,
208
  model_folder=model_folder,
209
  model_name_pretty=model_name_pretty,
210
  model_availability=model_availability,
 
213
  submitted_by=submitted_by,
214
  filenames=filenames,
215
  )
 
216
  pr_title = f"πŸš€ New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
217
 
218
+ logging.info(f"Start processing {pr_title}")
219
+
220
  task_id = TASKS_PRETTY_REVERSE[task_pretty]
221
 
222
+ logging.info("Checking if this request has already been submitted...")
223
  if not force:
224
  if model_name_pretty in self._fs.ls(
225
  f"datasets/{self._dataset_id}/{task_id}/predictions"
src/tasks.py CHANGED
@@ -17,7 +17,7 @@ TASKS_DESCRIPTIONS = {
17
  * [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
18
  * [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
19
  * [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
20
- * [BERTScore](https://huggingface.co/spaces/evaluate-metric/berscore)
21
 
22
  For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
23
  """,
 
17
  * [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
18
  * [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
19
  * [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
20
+ * [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
21
 
22
  For further details on the dataset and the baselines from 🏟️ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
23
  """,