OSainz commited on
Commit
a44e89a
β€’
2 Parent(s): 36cae97 7127ae8

Merge branch 'main' of https://huggingface.co/spaces/CONDA-Workshop/Data-Contamination-Report into pr/9

Browse files
Files changed (3) hide show
  1. .gitignore +2 -1
  2. README.md +1 -1
  3. contamination_report.csv +33 -15
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  *.pyc
2
- *.json
 
 
1
  *.pyc
2
+ *.json
3
+ *.lock
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: πŸπŸ’¨ Data Contamination Report
3
  emoji: 🏭
4
  colorFrom: green
5
  colorTo: blue
 
1
  ---
2
+ title: πŸπŸ’¨ Data Contamination Database
3
  emoji: 🏭
4
  colorFrom: green
5
  colorTo: blue
contamination_report.csv CHANGED
@@ -18,6 +18,13 @@ samsum;;GPT-3.5;model;;;74.00;model-based;https://arxiv.org/abs/2311.06233;8
18
  EdinburghNLP/xsum;;GPT-4;model;;;95.00;model-based;https://arxiv.org/abs/2311.06233;8
19
  EdinburghNLP/xsum;;GPT-3.5;model;;;79.00;model-based;https://arxiv.org/abs/2311.06233;8
20
 
 
 
 
 
 
 
 
21
  conll2003;;GPT-3.5;model;100.0;100.0;100.0;model-based;https://hitz-zentroa.github.io/lm-contamination/blog/;7
22
  nyu-mll/glue;mnli;GPT-3.5;model;100.0;100.0;;model-based;https://hitz-zentroa.github.io/lm-contamination/blog/;7
23
  rajpurkar/squad_v2;;GPT-3.5;model;100.0;100.0;;model-based;https://hitz-zentroa.github.io/lm-contamination/blog/;7
@@ -446,24 +453,35 @@ zest;;EleutherAI/pile;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
446
  zest;;togethercomputer/RedPajama-Data-V2;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
447
 
448
 
449
- imdb;;GPT-4;model;100.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
450
- imdb;;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
451
 
452
- ag_news;;GPT-4;model;100.0;;100.0;model-based;https://arxiv.org/pdf/2308.08493;3
453
- ag_news;;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
454
 
455
- yelp_review_full;;GPT-4;model;0.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
456
- yelp_review_full;;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
457
 
458
- nyu-mll/glue;rte;GPT-4;model;100.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
459
- nyu-mll/glue;rte;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
460
 
461
- nyu-mll/glue;wnli;GPT-4;model;100.0;;100.0;model-based;https://arxiv.org/pdf/2308.08493;3
462
- nyu-mll/glue;wnli;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
463
 
464
- samsum;;GPT-4;model;0.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
465
- samsum;;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/pdf/2308.08493;3
466
 
467
- EdinburghNLP/xsum;;GPT-4;model;0.0;;100.0;model-based;https://arxiv.org/pdf/2308.08493;3
468
- EdinburghNLP/xsum;;GPT-3.5;model;0.0;;100.0;model-based;https://arxiv.org/pdf/2308.08493;3
469
-
 
 
 
 
 
 
 
 
 
 
 
 
18
  EdinburghNLP/xsum;;GPT-4;model;;;95.00;model-based;https://arxiv.org/abs/2311.06233;8
19
  EdinburghNLP/xsum;;GPT-3.5;model;;;79.00;model-based;https://arxiv.org/abs/2311.06233;8
20
 
21
+ allenai/ai2_arc;;CommonCrawl;corpus;;;28.7;data-based;https://arxiv.org/abs/2310.17589;5
22
+ tau/commonsense_qa;;CommonCrawl;corpus;;1.6;;data-based;https://arxiv.org/abs/2310.17589;5
23
+ winogrande;;CommonCrawl;corpus;;1.1;;data-based;https://arxiv.org/abs/2310.17589;5
24
+ ceval/ceval-exam;;CommonCrawl;corpus;;45.8;;data-based;https://arxiv.org/abs/2310.17589;5
25
+ Rowan/hellaswag;;CommonCrawl;corpus;;12.4;;data-based;https://arxiv.org/abs/2310.17589;5
26
+ cais/mmlu;;CommonCrawl;corpus;;;29.1;data-based;https://arxiv.org/abs/2310.17589;5
27
+
28
  conll2003;;GPT-3.5;model;100.0;100.0;100.0;model-based;https://hitz-zentroa.github.io/lm-contamination/blog/;7
29
  nyu-mll/glue;mnli;GPT-3.5;model;100.0;100.0;;model-based;https://hitz-zentroa.github.io/lm-contamination/blog/;7
30
  rajpurkar/squad_v2;;GPT-3.5;model;100.0;100.0;;model-based;https://hitz-zentroa.github.io/lm-contamination/blog/;7
 
453
  zest;;togethercomputer/RedPajama-Data-V2;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
454
 
455
 
456
+ imdb;;GPT-4;model;100.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
457
+ imdb;;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
458
 
459
+ ag_news;;GPT-4;model;100.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
460
+ ag_news;;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
461
 
462
+ yelp_review_full;;GPT-4;model;0.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
463
+ yelp_review_full;;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
464
 
465
+ nyu-mll/glue;rte;GPT-4;model;100.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
466
+ nyu-mll/glue;rte;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
467
 
468
+ nyu-mll/glue;wnli;GPT-4;model;100.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
469
+ nyu-mll/glue;wnli;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
470
 
471
+ samsum;;GPT-4;model;0.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
472
+ samsum;;GPT-3.5;model;0.0;;0.0;model-based;https://arxiv.org/abs/2308.08493;3
473
 
474
+ EdinburghNLP/xsum;;GPT-4;model;0.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
475
+ EdinburghNLP/xsum;;GPT-3.5;model;0.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
476
+
477
+ bigbio/mednli;;GPT-4;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
478
+ bigbio/mednli;;GPT-3.5;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
479
+
480
+ RadNLI;;GPT-4;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
481
+ RadNLI;;GPT-3.5;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
482
+
483
+
484
+ openai_humaneval;;EleutherAI/pile;corpus;;;12.2;data-based;https://arxiv.org/abs/2403.04811;12
485
+ mbpp;;EleutherAI/pile;corpus;;;3.6;data-based;https://arxiv.org/abs/2403.04811;12
486
+ openai_humaneval;;bigcode/the-stack;corpus;;;18.9;data-based;https://arxiv.org/abs/2403.04811;12
487
+ mbpp;;bigcode/the-stack;corpus;;;20.8;data-based;https://arxiv.org/abs/2403.04811;12