Files changed (3) hide show
  1. README.md +1 -1
  2. contamination_report.csv +0 -37
  3. postprocessing.py +0 -4
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: green
5
  colorTo: blue
6
  sdk: gradio
7
  python_version: 3.11
8
- sdk_version: 4.36.0
9
  app_file: app.py
10
  app_port: 7860
11
  fullWidth: true
 
5
  colorTo: blue
6
  sdk: gradio
7
  python_version: 3.11
8
+ sdk_version: 4.19.1
9
  app_file: app.py
10
  app_port: 7860
11
  fullWidth: true
contamination_report.csv CHANGED
@@ -6,9 +6,6 @@ Anagrams 1;;GPT-3;;model;;3.0;;data-based;https://arxiv.org/abs/2005.14165;13
6
 
7
  Anagrams 2;;GPT-3;;model;;7.0;;data-based;https://arxiv.org/abs/2005.14165;13
8
 
9
- CodeForces2305;;GPT-3.5-turbo;0613;model;;;0.0;model-based;https://arxiv.org/abs/2402.15938;28
10
- CodeForces2305;;GPT-3.5-turbo;1106;model;;;0.0;model-based;https://arxiv.org/abs/2402.15938;28
11
-
12
  Cycled Letters;;GPT-3;;model;;1.0;;data-based;https://arxiv.org/abs/2005.14165;13
13
 
14
  EdinburghNLP/xsum;;GPT-3.5;;model;0.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
@@ -20,9 +17,6 @@ EdinburghNLP/xsum;;allenai/c4;;corpus;;;15.49;data-based;https://arxiv.org/abs/2
20
 
21
  EleutherAI/hendrycks_math;;GPT-4;;model;100.0;;;data-based;https://arxiv.org/abs/2303.08774;11
22
 
23
- HumanEval_R;;GPT-3.5-turbo;0613;model;;;9.76;model-based;https://arxiv.org/abs/2402.15938;28
24
- HumanEval_R;;GPT-3.5-turbo;1106;model;;;10.97;model-based;https://arxiv.org/abs/2402.15938;28
25
-
26
  RadNLI;;GPT-3.5;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
27
  RadNLI;;GPT-4;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
28
 
@@ -149,20 +143,6 @@ facebook/anli;test_r2;GPT-3;;model;;;18.0;data-based;https://arxiv.org/abs/2005.
149
 
150
  facebook/anli;test_r3;GPT-3;;model;;;16.0;data-based;https://arxiv.org/abs/2005.14165;13
151
 
152
- facebook/flores;;Claude 3 Opus;;model;;100.0;;model-based;https://arxiv.org/abs/2404.13813;29
153
- facebook/flores;;bigscience/bloomz;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
154
- facebook/flores;;bigscience/bloomz-1b1;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
155
- facebook/flores;;bigscience/bloomz-1b7;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
156
- facebook/flores;;bigscience/bloomz-3b;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
157
- facebook/flores;;bigscience/bloomz-560m;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
158
- facebook/flores;;bigscience/bloomz-7b1;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
159
- facebook/flores;;bigscience/mt0-base;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
160
- facebook/flores;;bigscience/mt0-large;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
161
- facebook/flores;;bigscience/mt0-small;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
162
- facebook/flores;;bigscience/mt0-xl;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
163
- facebook/flores;;bigscience/mt0-xxl;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
164
- facebook/flores;;bigscience/xP3;;corpus;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
165
-
166
  gigaword;;EleutherAI/pile;;corpus;;;1.18;data-based;https://arxiv.org/abs/2310.20707;2
167
  gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;2
168
  gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
@@ -170,13 +150,8 @@ gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://a
170
 
171
  gsm8k;;BAAI/Aquila2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/Aquila2-34B/blob/main/README.md;21
172
  gsm8k;;BAAI/AquilaChat2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/AquilaChat2-34B/blob/main/README.md;21
173
- gsm8k;;EleutherAI/llemma_34b;;model;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
174
- gsm8k;;EleutherAI/llemma_7b;;model;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
175
- gsm8k;;EleutherAI/proof-pile-2;;corpus;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
176
  gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
177
  gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
178
- gsm8k;;Qwen/Qwen-14B;;model;0.5;;;model-based;https://arxiv.org/abs/2404.18824;27
179
- gsm8k;;Qwen/Qwen-1_8B;;model;12.8;;0.075;model-based;https://arxiv.org/abs/2404.18824;27
180
 
181
  head_qa;en;EleutherAI/pile;;corpus;;;5.11;data-based;https://arxiv.org/abs/2310.20707;2
182
  head_qa;en;allenai/c4;;corpus;;;5.22;data-based;https://arxiv.org/abs/2310.20707;2
@@ -188,18 +163,6 @@ health_fact;;allenai/c4;;corpus;;;7.53;data-based;https://arxiv.org/abs/2310.207
188
  health_fact;;oscar-corpus/OSCAR-2301;;corpus;;;3.4;data-based;https://arxiv.org/abs/2310.20707;2
189
  health_fact;;togethercomputer/RedPajama-Data-V2;;corpus;;;18.7;data-based;https://arxiv.org/abs/2310.20707;2
190
 
191
- hendrycks/competition_math;;BAAI/Aquila2-34B;;model;3.366;;1.166;model-based;https://arxiv.org/abs/2404.18824;27
192
- hendrycks/competition_math;;BAAI/Aquila2-7B;;model;1;;0.133;model-based;https://arxiv.org/abs/2404.18824;27
193
- hendrycks/competition_math;;EleutherAI/llemma_34b;;model;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
194
- hendrycks/competition_math;;EleutherAI/llemma_7b;;model;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
195
- hendrycks/competition_math;;EleutherAI/proof-pile-2;;corpus;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
196
- hendrycks/competition_math;;Qwen/Qwen-14B;;model;1.766;;1.6;model-based;https://arxiv.org/abs/2404.18824;27
197
- hendrycks/competition_math;;Qwen/Qwen-1_8B;;model;4.533;;1.70;model-based;https://arxiv.org/abs/2404.18824;27
198
- hendrycks/competition_math;;Qwen/Qwen-7B;;model;1.266;;0.766;model-based;https://arxiv.org/abs/2404.18824;27
199
- hendrycks/competition_math;;THUDM/chatglm3-6b;;model;0.70;;0.4;model-based;https://arxiv.org/abs/2404.18824;27
200
- hendrycks/competition_math;;internlm/internlm2-20b;;model;4.733;;0.666;model-based;https://arxiv.org/abs/2404.18824;27
201
- hendrycks/competition_math;;internlm/internlm2-7b;;model;3.033;;0.433;model-based;https://arxiv.org/abs/2404.18824;27
202
-
203
  hlgd;;EleutherAI/pile;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
204
  hlgd;;allenai/c4;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
205
  hlgd;;oscar-corpus/OSCAR-2301;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
 
6
 
7
  Anagrams 2;;GPT-3;;model;;7.0;;data-based;https://arxiv.org/abs/2005.14165;13
8
 
 
 
 
9
  Cycled Letters;;GPT-3;;model;;1.0;;data-based;https://arxiv.org/abs/2005.14165;13
10
 
11
  EdinburghNLP/xsum;;GPT-3.5;;model;0.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
 
17
 
18
  EleutherAI/hendrycks_math;;GPT-4;;model;100.0;;;data-based;https://arxiv.org/abs/2303.08774;11
19
 
 
 
 
20
  RadNLI;;GPT-3.5;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
21
  RadNLI;;GPT-4;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
22
 
 
143
 
144
  facebook/anli;test_r3;GPT-3;;model;;;16.0;data-based;https://arxiv.org/abs/2005.14165;13
145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  gigaword;;EleutherAI/pile;;corpus;;;1.18;data-based;https://arxiv.org/abs/2310.20707;2
147
  gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;2
148
  gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
 
150
 
151
  gsm8k;;BAAI/Aquila2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/Aquila2-34B/blob/main/README.md;21
152
  gsm8k;;BAAI/AquilaChat2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/AquilaChat2-34B/blob/main/README.md;21
 
 
 
153
  gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
154
  gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
 
 
155
 
156
  head_qa;en;EleutherAI/pile;;corpus;;;5.11;data-based;https://arxiv.org/abs/2310.20707;2
157
  head_qa;en;allenai/c4;;corpus;;;5.22;data-based;https://arxiv.org/abs/2310.20707;2
 
163
  health_fact;;oscar-corpus/OSCAR-2301;;corpus;;;3.4;data-based;https://arxiv.org/abs/2310.20707;2
164
  health_fact;;togethercomputer/RedPajama-Data-V2;;corpus;;;18.7;data-based;https://arxiv.org/abs/2310.20707;2
165
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  hlgd;;EleutherAI/pile;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
167
  hlgd;;allenai/c4;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
168
  hlgd;;oscar-corpus/OSCAR-2301;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
postprocessing.py CHANGED
@@ -17,9 +17,6 @@ def remove_duplicates(data):
17
  def fix_arxiv_links(data):
18
  return [[*item[:-2], item[-2].replace("arxiv.org/pdf", "arxiv.org/abs"), item[-1]] for item in data]
19
 
20
- def fix_openreview_links(data):
21
- return [[*item[:-2], item[-2].replace("openreview.net/pdf", "openreview.net/forum"), item[-1]] for item in data]
22
-
23
  def sort_data(data):
24
  return sorted(data, key=lambda x: (x[0], x[1], x[2], x[3], x[-1]))
25
 
@@ -28,7 +25,6 @@ def main():
28
  data = sort_data(data)
29
  data = remove_duplicates(data)
30
  data = fix_arxiv_links(data)
31
- data = fix_openreview_links(data)
32
  print("Total datapoints:", len(data))
33
 
34
  with open("contamination_report.csv", 'w') as f:
 
17
  def fix_arxiv_links(data):
18
  return [[*item[:-2], item[-2].replace("arxiv.org/pdf", "arxiv.org/abs"), item[-1]] for item in data]
19
 
 
 
 
20
  def sort_data(data):
21
  return sorted(data, key=lambda x: (x[0], x[1], x[2], x[3], x[-1]))
22
 
 
25
  data = sort_data(data)
26
  data = remove_duplicates(data)
27
  data = fix_arxiv_links(data)
 
28
  print("Total datapoints:", len(data))
29
 
30
  with open("contamination_report.csv", 'w') as f: