Display urls on the leaderboard, tweak interface a little and fix CMG descriptions
Browse files- app.py +7 -4
- src/formatting.py +1 -1
- src/get_results_for_task.py +15 -1
- src/leaderboard_formatting.py +3 -3
- src/submission_uploader.py +8 -0
- src/tasks_content.py +4 -2
- src/utils.py +1 -0
app.py
CHANGED
@@ -82,7 +82,7 @@ with gr.Blocks() as demo:
|
|
82 |
with gr.Column():
|
83 |
url_textbox = gr.Textbox(
|
84 |
label="Relevant URLs",
|
85 |
-
placeholder=
|
86 |
)
|
87 |
model_availability_textbox = gr.Textbox(
|
88 |
label="Availability",
|
@@ -107,9 +107,12 @@ with gr.Blocks() as demo:
|
|
107 |
)
|
108 |
|
109 |
gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
113 |
|
114 |
gr.Markdown(SUBMISSION_TEXT_SUBMIT, elem_classes="markdown-text")
|
115 |
submit_button = gr.Button("Submit")
|
|
|
82 |
with gr.Column():
|
83 |
url_textbox = gr.Textbox(
|
84 |
label="Relevant URLs",
|
85 |
+
placeholder='URLs to relevant resources with additional details about your submission (optional). Use the following format: "[text1](link1), [text2](link2)".',
|
86 |
)
|
87 |
model_availability_textbox = gr.Textbox(
|
88 |
label="Availability",
|
|
|
107 |
)
|
108 |
|
109 |
gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
|
110 |
+
with gr.Row():
|
111 |
+
with gr.Column(variant="panel"):
|
112 |
+
task_specific_instructions = gr.Markdown(get_submission_text_files_for_task(None))
|
113 |
+
task_selection.select(get_submission_text_files_for_task, [task_selection], task_specific_instructions)
|
114 |
+
with gr.Column():
|
115 |
+
file_output = gr.File(file_count="multiple")
|
116 |
|
117 |
gr.Markdown(SUBMISSION_TEXT_SUBMIT, elem_classes="markdown-text")
|
118 |
submit_button = gr.Button("Submit")
|
src/formatting.py
CHANGED
@@ -10,5 +10,5 @@ def styled_message(message):
|
|
10 |
return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
|
11 |
|
12 |
|
13 |
-
def model_hyperlink(
|
14 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
|
|
10 |
return f"<p style='color: green; font-size: 20px; text-align: center;'>{message}</p>"
|
11 |
|
12 |
|
13 |
+
def model_hyperlink(model_name, link):
|
14 |
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
src/get_results_for_task.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import logging
|
2 |
import os
|
|
|
3 |
|
4 |
import pandas as pd # type: ignore[import]
|
5 |
from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
|
@@ -12,6 +13,7 @@ from .leaderboard_formatting import (
|
|
12 |
get_columns_per_task,
|
13 |
)
|
14 |
from .tasks_content import TASKS_PRETTY_REVERSE
|
|
|
15 |
|
16 |
try:
|
17 |
AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
|
@@ -32,6 +34,7 @@ def _get_results_stub() -> pd.DataFrame:
|
|
32 |
"BERTScore": "X",
|
33 |
"BERTScore (Normalized)": "X",
|
34 |
"Submitted By": "π Long Code Arena Team",
|
|
|
35 |
},
|
36 |
{
|
37 |
"Model Name": "CodeLlama-7b (instruct)",
|
@@ -43,14 +46,24 @@ def _get_results_stub() -> pd.DataFrame:
|
|
43 |
"BERTScore": "X",
|
44 |
"BERTScore (Normalized)": "X",
|
45 |
"Submitted By": "π Long Code Arena Team",
|
|
|
46 |
},
|
47 |
]
|
48 |
)
|
49 |
return stub_df
|
50 |
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
def _get_results_dataset(task_id: str) -> pd.DataFrame:
|
53 |
-
results_df = load_dataset(
|
|
|
|
|
54 |
results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
|
55 |
results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)
|
56 |
|
@@ -66,6 +79,7 @@ def _get_results_dataset(task_id: str) -> pd.DataFrame:
|
|
66 |
model_hyperlink(link=link, model_name=model_name) if link else model_name
|
67 |
for link, model_name in zip(results_df["model_url"], results_df["Model Name"])
|
68 |
]
|
|
|
69 |
results_df = results_df[get_columns_per_task(task_id)]
|
70 |
return results_df
|
71 |
|
|
|
1 |
import logging
|
2 |
import os
|
3 |
+
import re
|
4 |
|
5 |
import pandas as pd # type: ignore[import]
|
6 |
from datasets import get_dataset_config_names, load_dataset # type: ignore[import]
|
|
|
13 |
get_columns_per_task,
|
14 |
)
|
15 |
from .tasks_content import TASKS_PRETTY_REVERSE
|
16 |
+
from .utils import MD_LINK_PATTERN
|
17 |
|
18 |
try:
|
19 |
AVAILABLE_TASKS = get_dataset_config_names(os.environ["DATASET_ID"])
|
|
|
34 |
"BERTScore": "X",
|
35 |
"BERTScore (Normalized)": "X",
|
36 |
"Submitted By": "π Long Code Arena Team",
|
37 |
+
"Resources": "",
|
38 |
},
|
39 |
{
|
40 |
"Model Name": "CodeLlama-7b (instruct)",
|
|
|
46 |
"BERTScore": "X",
|
47 |
"BERTScore (Normalized)": "X",
|
48 |
"Submitted By": "π Long Code Arena Team",
|
49 |
+
"Resources": "",
|
50 |
},
|
51 |
]
|
52 |
)
|
53 |
return stub_df
|
54 |
|
55 |
|
56 |
+
def _process_urls(raw_urls: str) -> str:
|
57 |
+
if not raw_urls:
|
58 |
+
return raw_urls
|
59 |
+
html_urls = [model_hyperlink(*re.search(MD_LINK_PATTERN, url.strip()).groups()) for url in raw_urls.split(",")]
|
60 |
+
return ", ".join(html_urls)
|
61 |
+
|
62 |
+
|
63 |
def _get_results_dataset(task_id: str) -> pd.DataFrame:
|
64 |
+
results_df = load_dataset(
|
65 |
+
os.environ["DATASET_ID"], task_id, split="test", download_mode="force_redownload"
|
66 |
+
).to_pandas()
|
67 |
results_df = results_df.rename(columns=COLUMNS_PRETTY, errors="ignore")
|
68 |
results_df["Context Size"] = results_df["Context Size"].map(lambda x: f"{int(x) // 1000}k" if int(x) >= 1000 else x)
|
69 |
|
|
|
79 |
model_hyperlink(link=link, model_name=model_name) if link else model_name
|
80 |
for link, model_name in zip(results_df["model_url"], results_df["Model Name"])
|
81 |
]
|
82 |
+
results_df["Resources"] = [_process_urls(urls) for urls in results_df["Resources"]]
|
83 |
results_df = results_df[get_columns_per_task(task_id)]
|
84 |
return results_df
|
85 |
|
src/leaderboard_formatting.py
CHANGED
@@ -10,7 +10,7 @@ COLUMNS_PRETTY = {
|
|
10 |
"bertscore_normalized": "BERTScore (Normalized)",
|
11 |
"model_name": "Model Name",
|
12 |
"model_availability": "Availability",
|
13 |
-
"urls": "
|
14 |
"context_size": "Context Size",
|
15 |
"submitted_by": "Submitted By",
|
16 |
}
|
@@ -35,9 +35,9 @@ SORT_COLUMN_PER_TASK = {"commit_message_generation": "ROUGE-1"}
|
|
35 |
def get_columns_per_task(task_id: str) -> List[str]:
|
36 |
metrics_per_task = METRICS_PER_TASK[task_id]
|
37 |
|
38 |
-
return ["Model Name", "Availability", "Context Size"] + metrics_per_task + ["Submitted By"]
|
39 |
|
40 |
|
41 |
def get_types_per_task(task_id: str) -> List[str]:
|
42 |
metrics_per_task = METRICS_PER_TASK.get(task_id, (0, 0, 0, 0, 0))
|
43 |
-
return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown"]
|
|
|
10 |
"bertscore_normalized": "BERTScore (Normalized)",
|
11 |
"model_name": "Model Name",
|
12 |
"model_availability": "Availability",
|
13 |
+
"urls": "Resources",
|
14 |
"context_size": "Context Size",
|
15 |
"submitted_by": "Submitted By",
|
16 |
}
|
|
|
35 |
def get_columns_per_task(task_id: str) -> List[str]:
|
36 |
metrics_per_task = METRICS_PER_TASK[task_id]
|
37 |
|
38 |
+
return ["Model Name", "Availability", "Context Size"] + metrics_per_task + ["Submitted By", "Resources"]
|
39 |
|
40 |
|
41 |
def get_types_per_task(task_id: str) -> List[str]:
|
42 |
metrics_per_task = METRICS_PER_TASK.get(task_id, (0, 0, 0, 0, 0))
|
43 |
+
return ["html", "markdown", "markdown"] + ["number" for _ in metrics_per_task] + ["markdown", "html"]
|
src/submission_uploader.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1 |
import json
|
2 |
import logging
|
3 |
import os
|
|
|
4 |
import time
|
5 |
from tempfile import TemporaryDirectory
|
6 |
from typing import List, Optional
|
@@ -13,6 +14,7 @@ from tqdm import tqdm
|
|
13 |
from .evaluation import METRICS
|
14 |
from .formatting import styled_error, styled_message, styled_warning
|
15 |
from .tasks_content import TASKS_PRETTY_REVERSE
|
|
|
16 |
|
17 |
|
18 |
class AlreadyExists(Exception):
|
@@ -199,6 +201,12 @@ class SubmissionUploader:
|
|
199 |
except:
|
200 |
raise ValueError("Please, specify a model's context size as an integer (e.g., 16000).")
|
201 |
|
|
|
|
|
|
|
|
|
|
|
|
|
202 |
assert submitted_by, "Please, specify non-empty information about a submission's author(s)."
|
203 |
assert filenames, "Please, attach at least one file with predictions."
|
204 |
assert contact_information, "Please, fill in the field with contact information."
|
|
|
1 |
import json
|
2 |
import logging
|
3 |
import os
|
4 |
+
import re
|
5 |
import time
|
6 |
from tempfile import TemporaryDirectory
|
7 |
from typing import List, Optional
|
|
|
14 |
from .evaluation import METRICS
|
15 |
from .formatting import styled_error, styled_message, styled_warning
|
16 |
from .tasks_content import TASKS_PRETTY_REVERSE
|
17 |
+
from .utils import MD_LINK_PATTERN
|
18 |
|
19 |
|
20 |
class AlreadyExists(Exception):
|
|
|
201 |
except:
|
202 |
raise ValueError("Please, specify a model's context size as an integer (e.g., 16000).")
|
203 |
|
204 |
+
if urls is not None and "," in urls:
|
205 |
+
urls_list = urls.split(",")
|
206 |
+
assert all(
|
207 |
+
re.match(rf"^{MD_LINK_PATTERN}$", url.strip()) for url in urls_list
|
208 |
+
), 'Please, use the following format for URLs: "[text1](link1), [text2](link2)"'
|
209 |
+
|
210 |
assert submitted_by, "Please, specify non-empty information about a submission's author(s)."
|
211 |
assert filenames, "Please, attach at least one file with predictions."
|
212 |
assert contact_information, "Please, fill in the field with contact information."
|
src/tasks_content.py
CHANGED
@@ -13,7 +13,7 @@ TASKS_PRETTY_REVERSE = {value: key for key, value in TASKS_PRETTY.items()}
|
|
13 |
TASKS_DESCRIPTIONS = {
|
14 |
"commit_message_generation": """# Commit Message Generation\n
|
15 |
|
16 |
-
Our Commit Message Generation benchmark π€ [JetBrains-Research/lca-
|
17 |
|
18 |
We use the following metrics for evaluation:
|
19 |
* [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
|
@@ -21,7 +21,9 @@ TASKS_DESCRIPTIONS = {
|
|
21 |
* [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
|
22 |
* [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
|
23 |
|
24 |
-
For further details on the dataset and the baselines from ποΈ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
|
|
|
|
|
25 |
""",
|
26 |
"bug_localization": "cool description for Bug Localization on Issue task",
|
27 |
"module_to_text": "cool description for Module-to-Text task",
|
|
|
13 |
TASKS_DESCRIPTIONS = {
|
14 |
"commit_message_generation": """# Commit Message Generation\n
|
15 |
|
16 |
+
Our Commit Message Generation benchmark π€ [JetBrains-Research/lca-commit-message-generation](https://huggingface.co/datasets/JetBrains-Research/lca-commit-message-generation) includes 163 manually curated commits from Python projects.
|
17 |
|
18 |
We use the following metrics for evaluation:
|
19 |
* [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
|
|
|
21 |
* [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
|
22 |
* [BERTScore](https://huggingface.co/spaces/evaluate-metric/bertscore)
|
23 |
|
24 |
+
For further details on the dataset and the baselines from ποΈ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
|
25 |
+
|
26 |
+
**Note.** The leaderboard is sorted by ROUGE-1 metric by default.
|
27 |
""",
|
28 |
"bug_localization": "cool description for Bug Localization on Issue task",
|
29 |
"module_to_text": "cool description for Module-to-Text task",
|
src/utils.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
MD_LINK_PATTERN = r"\[(.*)\]\((.*?)\)"
|