Areyde commited on
Commit
946246a
β€’
1 Parent(s): f99dd0e

Update src/tasks_content.py

Browse files
Files changed (1) hide show
  1. src/tasks_content.py +19 -0
src/tasks_content.py CHANGED
@@ -20,6 +20,10 @@ TASKS_DESCRIPTIONS = {
20
  * `API Recall`: share of library-specific API calls used in the reference program that appear in the generated code,
21
 
22
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `library_based_code_generation` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
 
 
 
 
23
  """,
24
 
25
  "ci_builds_repair": """# CI builds repair\n
@@ -37,6 +41,10 @@ TASKS_DESCRIPTIONS = {
37
  * `oracle: files, lines` – ground truth diffs are used to select files and code blocks that should be corrected to fix the issue;
38
 
39
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `ci-builds-repair` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
 
 
 
 
40
  """,
41
 
42
  "project_code_completion": """# Project-level code completion\n
@@ -60,6 +68,10 @@ TASKS_DESCRIPTIONS = {
60
  * *random* – lines that don't fit any of the previous categories.
61
 
62
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `project_level_code_completion` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
 
 
 
 
63
  """,
64
 
65
  "commit_message_generation": """# Commit message generation\n
@@ -84,7 +96,10 @@ TASKS_DESCRIPTIONS = {
84
  We used information retrieval metrics such as `R@k`, `P@k`, `F1-score`, and `MAP` for evaluation, taking `k` equal to 1 and 2.
85
 
86
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `bug_localization` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
 
 
87
 
 
88
  """,
89
 
90
  "module_summarization": """# Module summarization\n
@@ -95,6 +110,10 @@ TASKS_DESCRIPTIONS = {
95
  * `CompScore`: the new metric based on LLM as an assessor proposed for this task. Our approach involves feeding the LLM with relevant code and two versions of documentation: the ground truth and the model-generated text. More details on how it is calculated can be found in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines/blob/main/module_summarization/README.md).
96
 
97
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `module_summarization` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines/blob/main/module_summarization/).
 
 
 
 
98
  """,
99
  }
100
 
 
20
  * `API Recall`: share of library-specific API calls used in the reference program that appear in the generated code,
21
 
22
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `library_based_code_generation` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
23
+
24
+ If you have any questions or requests concerning this dataset, please contact us at lca@jetbrains.com.
25
+
26
+ **Terms of use**. As this dataset is collected from GitHub, researchers may use it for research purposes only if any publications resulting from that research are open access (see [GitHub Acceptable Use Policies](https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies#7-information-usage-restrictions)).
27
  """,
28
 
29
  "ci_builds_repair": """# CI builds repair\n
 
41
  * `oracle: files, lines` – ground truth diffs are used to select files and code blocks that should be corrected to fix the issue;
42
 
43
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `ci-builds-repair` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
44
+
45
+ If you have any questions or requests concerning this dataset, please contact us at lca@jetbrains.com.
46
+
47
+ **Terms of use**. As this dataset is collected from GitHub, researchers may use it for research purposes only if any publications resulting from that research are open access (see [GitHub Acceptable Use Policies](https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies#7-information-usage-restrictions)).
48
  """,
49
 
50
  "project_code_completion": """# Project-level code completion\n
 
68
  * *random* – lines that don't fit any of the previous categories.
69
 
70
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `project_level_code_completion` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
71
+
72
+ If you have any questions or requests concerning this dataset, please contact us at lca@jetbrains.com.
73
+
74
+ **Terms of use**. As this dataset is collected from GitHub, researchers may use it for research purposes only if any publications resulting from that research are open access (see [GitHub Acceptable Use Policies](https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies#7-information-usage-restrictions)).
75
  """,
76
 
77
  "commit_message_generation": """# Commit message generation\n
 
96
  We used information retrieval metrics such as `R@k`, `P@k`, `F1-score`, and `MAP` for evaluation, taking `k` equal to 1 and 2.
97
 
98
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `bug_localization` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines).
99
+
100
+ If you have any questions or requests concerning this dataset, please contact us at lca@jetbrains.com.
101
 
102
+ **Terms of use**. As this dataset is collected from GitHub, researchers may use it for research purposes only if any publications resulting from that research are open access (see [GitHub Acceptable Use Policies](https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies#7-information-usage-restrictions)).
103
  """,
104
 
105
  "module_summarization": """# Module summarization\n
 
110
  * `CompScore`: the new metric based on LLM as an assessor proposed for this task. Our approach involves feeding the LLM with relevant code and two versions of documentation: the ground truth and the model-generated text. More details on how it is calculated can be found in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines/blob/main/module_summarization/README.md).
111
 
112
  For further details on the dataset and the baselines from the 🏟️ Long Code Arena team, refer to the `module_summarization` directory in [our baselines repository](https://github.com/JetBrains-Research/lca-baselines/blob/main/module_summarization/).
113
+
114
+ If you have any questions or requests concerning this dataset, please contact us at lca@jetbrains.com.
115
+
116
+ **Terms of use**. As this dataset is collected from GitHub, researchers may use it for research purposes only if any publications resulting from that research are open access (see [GitHub Acceptable Use Policies](https://docs.github.com/en/site-policy/acceptable-use-policies/github-acceptable-use-policies#7-information-usage-restrictions)).
117
  """,
118
  }
119