Muennighoff commited on
Commit
52322d5
1 Parent(s): f3c3380
Files changed (30) hide show
  1. 2b855b50oscarpy/evaluation/2b855b50oscarpy_0_babi.json +22 -0
  2. 2b855b50oscarpy/evaluation/2b855b50oscarpy_1_babi.json +22 -0
  3. 2b855b50oscarpy/evaluation/2b855b50oscarpy_2_babi.json +22 -0
  4. 2b855b50oscarpy/evaluation/2b855b50oscarpy_3_babi.json +22 -0
  5. 2b855b50oscarpy/evaluation/2b855b50oscarpy_4_babi.json +22 -0
  6. 2b855b50oscarpy/evaluation/2b855b50oscarpy_5_babi.json +4 -4
  7. 2b855b60oscarpy/evaluation/2b855b60oscarpy_0_babi.json +22 -0
  8. 2b855b60oscarpy/evaluation/2b855b60oscarpy_1_babi.json +22 -0
  9. 2b855b60oscarpy/evaluation/2b855b60oscarpy_2_babi.json +22 -0
  10. 2b855b60oscarpy/evaluation/2b855b60oscarpy_3_babi.json +22 -0
  11. 2b855b60oscarpy/evaluation/2b855b60oscarpy_4_babi.json +22 -0
  12. 2b855b60oscarpy/evaluation/2b855b60oscarpy_5_babi.json +4 -4
  13. 2b855b70oscarpy/evaluation/2b855b70oscarpy_0_babi.json +22 -0
  14. 2b855b70oscarpy/evaluation/2b855b70oscarpy_1_babi.json +22 -0
  15. 2b855b70oscarpy/evaluation/2b855b70oscarpy_2_babi.json +22 -0
  16. 2b855b70oscarpy/evaluation/2b855b70oscarpy_3_babi.json +22 -0
  17. 2b855b70oscarpy/evaluation/2b855b70oscarpy_4_babi.json +22 -0
  18. 2b855b70oscarpy/evaluation/2b855b70oscarpy_5_babi.json +4 -4
  19. 2b855b80oscarpy/evaluation/2b855b80oscarpy_0_babi.json +22 -0
  20. 2b855b80oscarpy/evaluation/2b855b80oscarpy_1_babi.json +22 -0
  21. 2b855b80oscarpy/evaluation/2b855b80oscarpy_2_babi.json +22 -0
  22. 2b855b80oscarpy/evaluation/2b855b80oscarpy_3_babi.json +22 -0
  23. 2b855b80oscarpy/evaluation/2b855b80oscarpy_4_babi.json +22 -0
  24. 2b855b80oscarpy/evaluation/2b855b80oscarpy_5_babi.json +4 -4
  25. 2b855b90oscarpy/evaluation/2b855b90oscarpy_0_babi.json +22 -0
  26. 2b855b90oscarpy/evaluation/2b855b90oscarpy_1_babi.json +22 -0
  27. 2b855b90oscarpy/evaluation/2b855b90oscarpy_2_babi.json +22 -0
  28. 2b855b90oscarpy/evaluation/2b855b90oscarpy_3_babi.json +22 -0
  29. 2b855b90oscarpy/evaluation/2b855b90oscarpy_4_babi.json +22 -0
  30. 2b855b90oscarpy/evaluation/2b855b90oscarpy_5_babi.json +4 -4
2b855b50oscarpy/evaluation/2b855b50oscarpy_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b50oscarpy/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50oscarpy/evaluation/2b855b50oscarpy_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.07333333333333333,
5
+ "em_stderr": 0.0047601895956827945
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b50oscarpy/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50oscarpy/evaluation/2b855b50oscarpy_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.15933333333333333,
5
+ "em_stderr": 0.00668308526183433
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b50oscarpy/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50oscarpy/evaluation/2b855b50oscarpy_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.20766666666666667,
5
+ "em_stderr": 0.007407115668615664
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b50oscarpy/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50oscarpy/evaluation/2b855b50oscarpy_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.24566666666666667,
5
+ "em_stderr": 0.007860796135919237
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b50oscarpy/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b50oscarpy/evaluation/2b855b50oscarpy_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.2704194260485651,
5
- "em_stderr": 0.014764926338949921
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.2683333333333333,
5
+ "em_stderr": 0.00809106546127376
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
2b855b60oscarpy/evaluation/2b855b60oscarpy_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b60oscarpy/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60oscarpy/evaluation/2b855b60oscarpy_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.09033333333333333,
5
+ "em_stderr": 0.0052345207494638975
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b60oscarpy/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60oscarpy/evaluation/2b855b60oscarpy_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.17933333333333334,
5
+ "em_stderr": 0.007005282836938614
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b60oscarpy/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60oscarpy/evaluation/2b855b60oscarpy_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.23166666666666666,
5
+ "em_stderr": 0.0077040373487692005
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b60oscarpy/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60oscarpy/evaluation/2b855b60oscarpy_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.26966666666666667,
5
+ "em_stderr": 0.0081037486448691
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b60oscarpy/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b60oscarpy/evaluation/2b855b60oscarpy_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.2902869757174393,
5
- "em_stderr": 0.015087972138612144
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.29333333333333333,
5
+ "em_stderr": 0.00831380394735107
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
2b855b70oscarpy/evaluation/2b855b70oscarpy_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70oscarpy/evaluation/2b855b70oscarpy_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.07666666666666666,
5
+ "em_stderr": 0.004858411710742635
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70oscarpy/evaluation/2b855b70oscarpy_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.18033333333333335,
5
+ "em_stderr": 0.00702050591067492
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70oscarpy/evaluation/2b855b70oscarpy_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.22833333333333333,
5
+ "em_stderr": 0.007664984762185963
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70oscarpy/evaluation/2b855b70oscarpy_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.268,
5
+ "em_stderr": 0.00808788010553647
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b70oscarpy/evaluation/2b855b70oscarpy_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.293598233995585,
5
- "em_stderr": 0.015138342194874756
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.27666666666666667,
5
+ "em_stderr": 0.008168821862685123
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
2b855b80oscarpy/evaluation/2b855b80oscarpy_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80oscarpy/evaluation/2b855b80oscarpy_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.07666666666666666,
5
+ "em_stderr": 0.004858411710742635
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80oscarpy/evaluation/2b855b80oscarpy_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.18033333333333335,
5
+ "em_stderr": 0.00702050591067492
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80oscarpy/evaluation/2b855b80oscarpy_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.22833333333333333,
5
+ "em_stderr": 0.007664984762185963
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80oscarpy/evaluation/2b855b80oscarpy_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.268,
5
+ "em_stderr": 0.00808788010553647
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b70oscarpy/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b80oscarpy/evaluation/2b855b80oscarpy_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.293598233995585,
5
- "em_stderr": 0.015138342194874756
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.27666666666666667,
5
+ "em_stderr": 0.008168821862685123
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
2b855b90oscarpy/evaluation/2b855b90oscarpy_0_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.0,
5
+ "em_stderr": 0.0
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b90oscarpy/transformers",
14
+ "num_fewshot": 0,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90oscarpy/evaluation/2b855b90oscarpy_1_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.078,
5
+ "em_stderr": 0.004896937137753452
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b90oscarpy/transformers",
14
+ "num_fewshot": 1,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90oscarpy/evaluation/2b855b90oscarpy_2_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.17166666666666666,
5
+ "em_stderr": 0.006885846283046829
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b90oscarpy/transformers",
14
+ "num_fewshot": 2,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90oscarpy/evaluation/2b855b90oscarpy_3_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.22266666666666668,
5
+ "em_stderr": 0.0075970155734085614
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b90oscarpy/transformers",
14
+ "num_fewshot": 3,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90oscarpy/evaluation/2b855b90oscarpy_4_babi.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "babi": {
4
+ "em": 0.25766666666666665,
5
+ "em_stderr": 0.007986203415903146
6
+ }
7
+ },
8
+ "versions": {
9
+ "babi": 0
10
+ },
11
+ "config": {
12
+ "model": "gpt2",
13
+ "model_args": "pretrained=/pfs/lustrep4/scratch/project_462000119/muennighoff/nov-2022-bettercom/lm1-2b8-55b-oscarpy/2b855b90oscarpy/transformers",
14
+ "num_fewshot": 4,
15
+ "batch_size": null,
16
+ "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
+ "bootstrap_iters": 100000,
20
+ "description_dict": {}
21
+ }
22
+ }
2b855b90oscarpy/evaluation/2b855b90oscarpy_5_babi.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "results": {
3
  "babi": {
4
- "em": 0.24944812362030905,
5
- "em_stderr": 0.014383222164247377
6
  }
7
  },
8
  "versions": {
@@ -14,8 +14,8 @@
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
- "no_cache": false,
18
- "limit": 906,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }
 
1
  {
2
  "results": {
3
  "babi": {
4
+ "em": 0.26866666666666666,
5
+ "em_stderr": 0.008094244986218791
6
  }
7
  },
8
  "versions": {
 
14
  "num_fewshot": 5,
15
  "batch_size": null,
16
  "device": null,
17
+ "no_cache": true,
18
+ "limit": 3000,
19
  "bootstrap_iters": 100000,
20
  "description_dict": {}
21
  }