saridormi commited on
Commit
e2473e2
โ€ข
1 Parent(s): 779249f

Add initial space version

Browse files
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import gradio as gr
4
+
5
+ from src.content import (INTRODUCTION_TEXT, INTRODUCTION_TITLE,
6
+ LEADERBOARD_TEXT, LEADERBOARD_TITLE,
7
+ SUBMISSION_TEXT_FILES, SUBMISSION_TEXT_INTRO,
8
+ SUBMISSION_TEXT_METADATA, SUBMISSION_TEXT_SUBMIT,
9
+ SUBMISSION_TEXT_TASK, SUBMISSION_TITLE)
10
+ from src.get_results_for_task import get_results_for_task_stub
11
+ from src.submission_uploader import SubmissionUploader
12
+ from src.tasks import TASKS_DESCRIPTIONS, TASKS_PRETTY, TASKS_PRETTY_REVERSE
13
+
14
+ submission_uploader = SubmissionUploader(os.environ["DATASET_ID"])
15
+
16
+
17
+ with gr.Blocks() as demo:
18
+ gr.HTML(INTRODUCTION_TITLE)
19
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
20
+
21
+ gr.HTML(LEADERBOARD_TITLE)
22
+ gr.Markdown(LEADERBOARD_TEXT, elem_classes="markdown-text")
23
+
24
+ with gr.Tabs():
25
+ for task in TASKS_PRETTY_REVERSE:
26
+ with gr.TabItem(task):
27
+ with gr.Row():
28
+ gr.Markdown(TASKS_DESCRIPTIONS[task])
29
+
30
+ leaderboard_table = gr.components.Dataframe(
31
+ value=get_results_for_task_stub(task), interactive=False
32
+ )
33
+
34
+ gr.HTML(SUBMISSION_TITLE)
35
+ gr.Markdown(SUBMISSION_TEXT_INTRO, elem_classes="markdown-text")
36
+
37
+ with gr.Accordion("๐Ÿš€ Submit new results"):
38
+ gr.Markdown(SUBMISSION_TEXT_TASK, elem_classes="markdown-text")
39
+ task = gr.Radio(TASKS_PRETTY_REVERSE.keys(), label="Task")
40
+
41
+ gr.Markdown(SUBMISSION_TEXT_METADATA, elem_classes="markdown-text")
42
+ with gr.Row():
43
+ with gr.Column():
44
+ model_folder_textbox = gr.Textbox(
45
+ label="Model Folder",
46
+ placeholder="How to call a folder related to this submission in our results dataset.",
47
+ )
48
+ model_name_textbox = gr.Textbox(
49
+ label="Model Name",
50
+ placeholder="How to display model's name on the leaderboard.",
51
+ )
52
+ model_availability_textbox = gr.Textbox(
53
+ label="Availability",
54
+ placeholder="Information about the model's availability and licensing.",
55
+ )
56
+ context_size_textbox = gr.Textbox(
57
+ label="Context Size",
58
+ placeholder="Context size (in tokens) used for the submission.",
59
+ )
60
+ with gr.Column():
61
+ submitted_by_textbox = gr.Textbox(
62
+ label="Submitted By",
63
+ placeholder="Who submitted the model, how it will be displayed on the leaderboard.",
64
+ )
65
+ contact_textbox = gr.Textbox(
66
+ label="Contact Information",
67
+ placeholder="How Long Code Arena team can contact you in case of any questions (won't go to public dataset).",
68
+ )
69
+ comment_textbox = gr.Textbox(
70
+ label="Comment",
71
+ placeholder="Any comments you have for Long Code Arena team (optional, won't go to public dataset).",
72
+ )
73
+ url_textbox = gr.Textbox(
74
+ label="Relevant URLs",
75
+ placeholder="URLs to relevant resources (preprint/blogpost/code/etc.) with "
76
+ "additional details about your submission.",
77
+ )
78
+
79
+ gr.Markdown(SUBMISSION_TEXT_FILES, elem_classes="markdown-text")
80
+ file_output = gr.File(file_count="multiple")
81
+
82
+ gr.Markdown(SUBMISSION_TEXT_SUBMIT, elem_classes="markdown-text")
83
+ submit_button = gr.Button("Submit")
84
+ submission_result = gr.Markdown()
85
+ submit_button.click(
86
+ submission_uploader.upload_files,
87
+ [
88
+ task,
89
+ model_folder_textbox,
90
+ model_name_textbox,
91
+ model_availability_textbox,
92
+ url_textbox,
93
+ context_size_textbox,
94
+ submitted_by_textbox,
95
+ file_output,
96
+ ],
97
+ submission_result,
98
+ )
99
+
100
+ if __name__ == "__main__":
101
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ huggingface_hub
src/content.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ================================
2
+ # = ABOUT =
3
+ # ================================
4
+ INTRODUCTION_TITLE = """<h1 align="center">๐ŸŸ๏ธ Long Code Arena</h1>"""
5
+
6
+ INTRODUCTION_TEXT = """๐ŸŸ๏ธ Long Code Arena is a benchmark of code-related tasks with large contexts, up to a whole code repository.
7
+ It currently spans six different tasks."""
8
+
9
+ # ================================
10
+ # = LEADERBOARD =
11
+ # ================================
12
+ LEADERBOARD_TITLE = '<h2 align="center">๐Ÿ…Leaderboard</h2>'
13
+
14
+ LEADERBOARD_TEXT = """Raw results from the leaderboard are available in ๐Ÿค— [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results)."""
15
+
16
+ # ================================
17
+ # = SUBMISSION =
18
+ # ================================
19
+ SUBMISSION_TITLE = '<h2 align="center">๐Ÿ“ฉ Make A Submission</h2>'
20
+
21
+ SUBMISSION_TEXT_INTRO = """Use the form below to submit new results to ๐ŸŸ๏ธ Long Code Arena. If any problems arise, don't hesitate to contact us by email `TODO` or open a discussion ๐Ÿ’›"""
22
+
23
+ SUBMISSION_TEXT_TASK = """1. Select a task you want to submit results for."""
24
+
25
+ SUBMISSION_TEXT_METADATA = """2. Fill in some metadata about your submission."""
26
+
27
+ SUBMISSION_TEXT_FILES = """3. Attach one or more files with your model's predictions.
28
+ * If several files are attached, they will be treated as separate runs of the submitted model (e.g., with different seeds), and the metrics will be averaged across runs. For baselines provided by ๐ŸŸ๏ธ Long Code Arena Team, the results are averaged across 3 runs.
29
+ * Please, attach files in [JSONLines format](https://jsonlines.org/). For an example, check the predictions provided by ๐ŸŸ๏ธ Long Code Arena Team in ๐Ÿค— [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results). Make sure to include `"prediction"` and `"reference"` fields for each example, the rest are optional.
30
+ """
31
+ SUBMISSION_TEXT_SUBMIT = """All set! A new PR to ๐Ÿค— [JetBrains-Research/lca-results](https://huggingface.co/datasets/JetBrains-Research/lca-results) should be opened when you press "Submit" button. ๐ŸŸ๏ธ Long Code Arena Team will review it shortly, and the results will appear in the leaderboard."""
src/get_results_for_task.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ RESULTS_DATASET = "JetBrains-Research/lca-results"
4
+
5
+
6
+ def get_results_for_task_stub(task: str) -> pd.DataFrame:
7
+ stub_df = pd.DataFrame(
8
+ [
9
+ {
10
+ "Model Name": "GPT-4",
11
+ "Availability": "Proprietary",
12
+ "Context Size": "16k",
13
+ "BLEU": "X",
14
+ "ROUGE": "X",
15
+ "ChrF": "X",
16
+ "BERTScore": "X",
17
+ "BERTScore (Normalized)": "X",
18
+ "Submitted By": "๐ŸŸ Long Code Arena Team",
19
+ },
20
+ {
21
+ "Model Name": "CodeLlama-7b (instruct)",
22
+ "Availability": "Llama 2 license",
23
+ "Context Size": "16k",
24
+ "BLEU": "X",
25
+ "ROUGE": "X",
26
+ "ChrF": "X",
27
+ "BERTScore": "X",
28
+ "BERTScore (Normalized)": "X",
29
+ "Submitted By": "๐ŸŸ Long Code Arena Team",
30
+ },
31
+ ]
32
+ )
33
+ return stub_df
src/submission_uploader.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ from typing import List, Optional
5
+
6
+ from huggingface_hub import (CommitInfo, CommitOperationAdd, Discussion, HfApi,
7
+ HfFileSystem)
8
+
9
+ from .tasks import TASKS_PRETTY_REVERSE
10
+
11
+
12
+ class AlreadyExists(Exception):
13
+ pass
14
+
15
+
16
+ class SubmissionUploader:
17
+ """Class for adding new files to a dataset on a Hub and opening a PR.
18
+
19
+ Heavily influenced by these amazing spaces:
20
+ * https://huggingface.co/spaces/safetensors/convert
21
+ * https://huggingface.co/spaces/gaia-benchmark/leaderboard
22
+ """
23
+
24
+ def __init__(self, dataset_id: str):
25
+ self._api = HfApi(token=os.environ["HF_TOKEN"])
26
+ self._fs = HfFileSystem(token=os.environ["HF_TOKEN"])
27
+ self._dataset_id = dataset_id
28
+
29
+ def _get_previous_pr(self, pr_title: str) -> Optional[Discussion]:
30
+ """Searches among discussions of dataset repo for a PR with the given title."""
31
+ try:
32
+ discussions = self._api.get_repo_discussions(repo_id=self._dataset_id)
33
+ except Exception:
34
+ return None
35
+ for discussion in discussions:
36
+ if (
37
+ discussion.status == "open"
38
+ and discussion.is_pull_request
39
+ and discussion.title == pr_title
40
+ ):
41
+ return discussion
42
+
43
+ def _upload_files(
44
+ self,
45
+ task_id: str,
46
+ model_folder: str,
47
+ model_name_pretty: str,
48
+ model_availability: str,
49
+ urls: str,
50
+ context_size: str,
51
+ submitted_by: str,
52
+ filenames: Optional[List[str]],
53
+ ) -> List[CommitOperationAdd]:
54
+ # add predictions files
55
+ commit_operations = [
56
+ CommitOperationAdd(
57
+ path_in_repo=f"{task_id}/{model_folder}/predictions/{filename}",
58
+ path_or_fileobj=filename,
59
+ )
60
+ for filename in filenames
61
+ ]
62
+
63
+ # add metadata file
64
+ metadata_dict = {
65
+ "model_name": model_name_pretty,
66
+ "model_availability": model_availability,
67
+ "urls": urls,
68
+ "context_size": context_size,
69
+ "submitted_by": submitted_by,
70
+ }
71
+ with open("metadata.json", "w") as f:
72
+ json.dump(metadata_dict, f)
73
+ commit_operations.append(
74
+ CommitOperationAdd(
75
+ path_in_repo=f"{task_id}/predictions/metadata.json",
76
+ path_or_fileobj="metadata.json",
77
+ )
78
+ )
79
+
80
+ return commit_operations
81
+
82
+ def upload_files(
83
+ self,
84
+ task_pretty: str,
85
+ model_folder: str,
86
+ model_name_pretty: str,
87
+ model_availability: str,
88
+ urls: str,
89
+ context_size: str,
90
+ submitted_by: str,
91
+ filenames: Optional[List[str]],
92
+ force: bool = False,
93
+ ) -> Optional[CommitInfo]:
94
+ pr_title = f"๐Ÿš€ New submission to {task_pretty} task: {model_name_pretty} with {context_size} context size from {submitted_by}"
95
+
96
+ task_id = TASKS_PRETTY_REVERSE[task_pretty]
97
+
98
+ if not force:
99
+ if model_name_pretty in self._fs.ls(
100
+ f"{self._dataset_id}/{task_id}/predictions"
101
+ ) and all(
102
+ filename
103
+ in self._fs.ls(
104
+ f"{self._dataset_id}/{task_id}/predictions/{model_name_pretty}"
105
+ )
106
+ for filename in filenames + ["metadata.json"]
107
+ ):
108
+ raise AlreadyExists(
109
+ f"{model_name_pretty} is already present in {self._dataset_id}."
110
+ )
111
+
112
+ prev_pr = self._get_previous_pr(pr_title)
113
+ if prev_pr is not None:
114
+ url = f"https://huggingface.co/{self._dataset_id}/discussions/{prev_pr.num}"
115
+ raise AlreadyExists(
116
+ f"{self._dataset_id} already has an open PR for this submission: {url}."
117
+ )
118
+
119
+ commit_operations = self._upload_files(
120
+ task_id=task_id,
121
+ model_folder=model_folder,
122
+ model_name_pretty=model_name_pretty,
123
+ model_availability=model_availability,
124
+ urls=urls,
125
+ context_size=context_size,
126
+ submitted_by=submitted_by,
127
+ filenames=filenames,
128
+ )
129
+
130
+ new_pr = self._api.create_commit(
131
+ repo_id=self._dataset_id,
132
+ operations=commit_operations,
133
+ commit_message=pr_title,
134
+ commit_description=f"""New submission to {task_pretty} task in ๐ŸŸ๏ธLong Code Arena benchmark!
135
+
136
+ * Model name: {model_name_pretty}
137
+ * Model availability: {model_availability}
138
+ * Context Size: {context_size}
139
+ * Relevant URLs: {urls}
140
+ * Submitted By: {submitted_by}
141
+ """,
142
+ create_pr=True,
143
+ )
144
+ logging.info(f"PR created at {new_pr.pr_url}")
145
+
146
+ return new_pr
src/tasks.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TASKS_PRETTY = {
2
+ "cmg": "Commit Message Generation",
3
+ "bug_localization": "Bug Localization on Issue",
4
+ "module_to_text": "Module-to-Text",
5
+ "library_usage": "Library Usage Examples Generation",
6
+ "project_code_completion": "Project-level Code Completion",
7
+ "bug_localization_build_logs": "Bug Localization on Build Logs",
8
+ }
9
+ TASKS_PRETTY_REVERSE = {value: key for key, value in TASKS_PRETTY.items()}
10
+
11
+ TASKS_DESCRIPTIONS = {
12
+ "Commit Message Generation": """# Commit Message Generation\n
13
+
14
+ Our Commit Message Generation benchmark ๐Ÿค— [JetBrains-Research/lca-cmg](https://huggingface.co/datasets/JetBrains-Research/lca-cmg) includes 163 manually curated commits from Python projects.
15
+
16
+ We use the following metrics for evaluation:
17
+ * [BLEU](https://huggingface.co/spaces/evaluate-metric/sacrebleu)
18
+ * [ROUGE](https://huggingface.co/spaces/evaluate-metric/rouge)
19
+ * [ChrF](https://huggingface.co/spaces/evaluate-metric/chrf)
20
+ * [BERTScore](https://huggingface.co/spaces/evaluate-metric/berscore)
21
+
22
+ For further details on the dataset and the baselines from ๐ŸŸ๏ธ Long Code Arena Team, refer to `commit_message_generation` folder in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines) or to our preprint (TODO).
23
+ """,
24
+ "Bug Localization on Issue": "cool description for Bug Localization on Issue task",
25
+ "Module-to-Text": "cool description for Module-to-Text task",
26
+ "Library Usage Examples Generation": "cool description for Library Usage Examples Generation task",
27
+ "Project-level Code Completion": "cool description for Project-level Code Completion task",
28
+ "Bug Localization on Build Logs": "cool description for Bug Localization on Build Logs task",
29
+ }