Muennighoff commited on
Commit
41063e3
1 Parent(s): ef69ae1

Better model with bs=256

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ For usage instructions, refer to our codebase: https://github.com/Muennighoff/sg
14
 
15
  ## Evaluation Results
16
 
17
- For eval results, refer to our paper: https://arxiv.org/abs/2202.08904
18
 
19
  ## Training
20
  The model was trained with the parameters:
14
 
15
  ## Evaluation Results
16
 
17
+ For eval results, refer to the eval folder or our paper: https://arxiv.org/abs/2202.08904
18
 
19
  ## Training
20
  The model was trained with the parameters:
config.json CHANGED
@@ -75,7 +75,7 @@
75
  },
76
  "tokenizer_class": "GPT2Tokenizer",
77
  "torch_dtype": "float32",
78
- "transformers_version": "4.11.3",
79
  "use_cache": true,
80
  "vocab_size": 50259,
81
  "window_size": 256
75
  },
76
  "tokenizer_class": "GPT2Tokenizer",
77
  "torch_dtype": "float32",
78
+ "transformers_version": "4.20.0.dev0",
79
  "use_cache": true,
80
  "vocab_size": 50259,
81
  "window_size": 256
config_sentence_transformers.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.1.0",
4
- "transformers": "4.11.3",
5
- "pytorch": "1.10.1"
6
  }
7
  }
1
  {
2
  "__version__": {
3
  "sentence_transformers": "2.1.0",
4
+ "transformers": "4.20.0.dev0",
5
+ "pytorch": "1.10.2"
6
  }
7
  }
eval/beir.json ADDED
@@ -0,0 +1,276 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "SGPT-2.7B-weightedmean-msmarco-specb-bitfit": {
3
+ "cqadupstack_android": {
4
+ "NDCG@1": 0.34049,
5
+ "NDCG@3": 0.37556,
6
+ "NDCG@5": 0.39905,
7
+ "NDCG@10": 0.42857,
8
+ "NDCG@100": 0.48229,
9
+ "NDCG@1000": 0.50638
10
+ },
11
+ "cqadupstack_english": {
12
+ "NDCG@1": 0.32548,
13
+ "NDCG@3": 0.37526,
14
+ "NDCG@5": 0.39115,
15
+ "NDCG@10": 0.41469,
16
+ "NDCG@100": 0.45826,
17
+ "NDCG@1000": 0.47974
18
+ },
19
+ "cqadupstack_gaming": {
20
+ "NDCG@1": 0.3906,
21
+ "NDCG@3": 0.45969,
22
+ "NDCG@5": 0.4835,
23
+ "NDCG@10": 0.5126,
24
+ "NDCG@100": 0.55479,
25
+ "NDCG@1000": 0.56948
26
+ },
27
+ "cqadupstack_gis": {
28
+ "NDCG@1": 0.23616,
29
+ "NDCG@3": 0.28729,
30
+ "NDCG@5": 0.30564,
31
+ "NDCG@10": 0.33284,
32
+ "NDCG@100": 0.38248,
33
+ "NDCG@1000": 0.40625
34
+ },
35
+ "cqadupstack_mathematica": {
36
+ "NDCG@1": 0.14428,
37
+ "NDCG@3": 0.19214,
38
+ "NDCG@5": 0.21163,
39
+ "NDCG@10": 0.24038,
40
+ "NDCG@100": 0.29953,
41
+ "NDCG@1000": 0.33115
42
+ },
43
+ "cqadupstack_physics": {
44
+ "NDCG@1": 0.30029,
45
+ "NDCG@3": 0.35534,
46
+ "NDCG@5": 0.37864,
47
+ "NDCG@10": 0.40593,
48
+ "NDCG@100": 0.46298,
49
+ "NDCG@1000": 0.48534
50
+ },
51
+ "cqadupstack_programmers": {
52
+ "NDCG@1": 0.28425,
53
+ "NDCG@3": 0.32215,
54
+ "NDCG@5": 0.34139,
55
+ "NDCG@10": 0.37059,
56
+ "NDCG@100": 0.42629,
57
+ "NDCG@1000": 0.45306
58
+ },
59
+ "cqadupstack_stats": {
60
+ "NDCG@1": 0.20552,
61
+ "NDCG@3": 0.2467,
62
+ "NDCG@5": 0.2676,
63
+ "NDCG@10": 0.28543,
64
+ "NDCG@100": 0.33504,
65
+ "NDCG@1000": 0.36273
66
+ },
67
+ "cqadupstack_wordpress": {
68
+ "NDCG@1": 0.20148,
69
+ "NDCG@3": 0.24132,
70
+ "NDCG@5": 0.2599,
71
+ "NDCG@10": 0.28437,
72
+ "NDCG@100": 0.33323,
73
+ "NDCG@1000": 0.36257
74
+ },
75
+ "cqadupstack_webmasters": {
76
+ "NDCG@1": 0.25296,
77
+ "NDCG@3": 0.31291,
78
+ "NDCG@5": 0.32524,
79
+ "NDCG@10": 0.35099,
80
+ "NDCG@100": 0.40592,
81
+ "NDCG@1000": 0.43605
82
+ },
83
+ "cqadupstack_unix": {
84
+ "NDCG@1": 0.24627,
85
+ "NDCG@3": 0.28856,
86
+ "NDCG@5": 0.30818,
87
+ "NDCG@10": 0.33186,
88
+ "NDCG@100": 0.38704,
89
+ "NDCG@1000": 0.41468
90
+ },
91
+ "cqadupstack_tex": {
92
+ "NDCG@1": 0.16999,
93
+ "NDCG@3": 0.19658,
94
+ "NDCG@5": 0.21547,
95
+ "NDCG@10": 0.23556,
96
+ "NDCG@100": 0.28445,
97
+ "NDCG@1000": 0.31631
98
+ },
99
+ "cqadupstack": {
100
+ "NDCG@1": 0.2581475,
101
+ "NDCG@3": 0.30445833333333333,
102
+ "NDCG@5": 0.3239491666666666,
103
+ "NDCG@10": 0.34948416666666665,
104
+ "NDCG@100": 0.4010250000000001,
105
+ "NDCG@1000": 0.42697833333333335
106
+ },
107
+ "trec-covid": {
108
+ "NDCG@1": 0.92,
109
+ "NDCG@3": 0.87246,
110
+ "NDCG@5": 0.83239,
111
+ "NDCG@10": 0.80666,
112
+ "NDCG@100": 0.57691,
113
+ "NDCG@1000": 0.48148
114
+ },
115
+ "trec-news": {
116
+ "NDCG@1": 0.50439,
117
+ "NDCG@3": 0.47263,
118
+ "NDCG@5": 0.47021,
119
+ "NDCG@10": 0.4379,
120
+ "NDCG@100": 0.43861,
121
+ "NDCG@1000": 0.54047
122
+ },
123
+ "signal1m": {
124
+ "NDCG@1": 0.36598,
125
+ "NDCG@3": 0.325,
126
+ "NDCG@5": 0.28387,
127
+ "NDCG@10": 0.24938,
128
+ "NDCG@100": 0.26649,
129
+ "NDCG@1000": 0.32811
130
+ },
131
+ "nfcorpus": {
132
+ "NDCG@1": 0.42879,
133
+ "NDCG@3": 0.3913,
134
+ "NDCG@5": 0.3704,
135
+ "NDCG@10": 0.33859,
136
+ "NDCG@100": 0.3138,
137
+ "NDCG@1000": 0.40355
138
+ },
139
+ "robust04": {
140
+ "NDCG@1": 0.59036,
141
+ "NDCG@3": 0.53574,
142
+ "NDCG@5": 0.50433,
143
+ "NDCG@10": 0.44895,
144
+ "NDCG@100": 0.36797,
145
+ "NDCG@1000": 0.45528
146
+ },
147
+ "average": {
148
+ "NDCG@1": 0.4584631944444445,
149
+ "NDCG@3": 0.44786324074074074,
150
+ "NDCG@5": 0.44920439814814817,
151
+ "NDCG@10": 0.45288189814814817,
152
+ "NDCG@100": 0.47065527777777777,
153
+ "NDCG@1000": 0.5063276851851853
154
+ },
155
+ "subaverage": {
156
+ "NDCG@1": 0.5116781818181819,
157
+ "NDCG@3": 0.49828636363636364,
158
+ "NDCG@5": 0.50451,
159
+ "NDCG@10": 0.5136945454545455,
160
+ "NDCG@100": 0.5280254545454546,
161
+ "NDCG@1000": 0.5527736363636363
162
+ },
163
+ "subsubaverage": {
164
+ "NDCG@1": 0.35645,
165
+ "NDCG@3": 0.377964,
166
+ "NDCG@5": 0.387838,
167
+ "NDCG@10": 0.40856200000000004,
168
+ "NDCG@100": 0.444236,
169
+ "NDCG@1000": 0.480742
170
+ },
171
+ "hotpotqa": {
172
+ "NDCG@1": 0.64389,
173
+ "NDCG@3": 0.48987,
174
+ "NDCG@5": 0.51016,
175
+ "NDCG@10": 0.52835,
176
+ "NDCG@100": 0.5585,
177
+ "NDCG@1000": 0.57493
178
+ },
179
+ "fiqa": {
180
+ "NDCG@1": 0.31019,
181
+ "NDCG@3": 0.29326,
182
+ "NDCG@5": 0.30571,
183
+ "NDCG@10": 0.33282,
184
+ "NDCG@100": 0.39516,
185
+ "NDCG@1000": 0.42946
186
+ },
187
+ "arguana": {
188
+ "NDCG@1": 0.2596,
189
+ "NDCG@3": 0.40018,
190
+ "NDCG@5": 0.45133,
191
+ "NDCG@10": 0.50512,
192
+ "NDCG@100": 0.54867,
193
+ "NDCG@1000": 0.55109
194
+ },
195
+ "climate-fever": {
196
+ "NDCG@1": 0.23322,
197
+ "NDCG@3": 0.21506,
198
+ "NDCG@5": 0.23853,
199
+ "NDCG@10": 0.27171,
200
+ "NDCG@100": 0.34051,
201
+ "NDCG@1000": 0.37522
202
+ },
203
+ "scifact": {
204
+ "NDCG@1": 0.58667,
205
+ "NDCG@3": 0.6479,
206
+ "NDCG@5": 0.6728,
207
+ "NDCG@10": 0.70165,
208
+ "NDCG@100": 0.7294,
209
+ "NDCG@1000": 0.73457
210
+ },
211
+ "msmarco": {
212
+ "NDCG@1": 0.21203,
213
+ "NDCG@3": 0.31221,
214
+ "NDCG@5": 0.3507,
215
+ "NDCG@10": 0.38832,
216
+ "NDCG@100": 0.44741,
217
+ "NDCG@1000": 0.46137
218
+ },
219
+ "webis-touche2020": {
220
+ "NDCG@1": 0.35714,
221
+ "NDCG@3": 0.26146,
222
+ "NDCG@5": 0.24908,
223
+ "NDCG@10": 0.23519,
224
+ "NDCG@100": 0.35356,
225
+ "NDCG@1000": 0.46504
226
+ },
227
+ "quora": {
228
+ "NDCG@1": 0.777,
229
+ "NDCG@3": 0.82276,
230
+ "NDCG@5": 0.84034,
231
+ "NDCG@10": 0.85592,
232
+ "NDCG@100": 0.87054,
233
+ "NDCG@1000": 0.87235
234
+ },
235
+ "scidocs": {
236
+ "NDCG@1": 0.197,
237
+ "NDCG@3": 0.15718,
238
+ "NDCG@5": 0.13895,
239
+ "NDCG@10": 0.16463,
240
+ "NDCG@100": 0.23415,
241
+ "NDCG@1000": 0.28504
242
+ },
243
+ "fever": {
244
+ "NDCG@1": 0.61446,
245
+ "NDCG@3": 0.69113,
246
+ "NDCG@5": 0.71169,
247
+ "NDCG@10": 0.72753,
248
+ "NDCG@100": 0.74513,
249
+ "NDCG@1000": 0.74954
250
+ },
251
+ "dbpedia-entity": {
252
+ "NDCG@1": 0.4975,
253
+ "NDCG@3": 0.39577,
254
+ "NDCG@5": 0.36718,
255
+ "NDCG@10": 0.3471,
256
+ "NDCG@100": 0.3761,
257
+ "NDCG@1000": 0.44328
258
+ },
259
+ "bioasq": {
260
+ "NDCG@1": 0.416,
261
+ "NDCG@3": 0.39386,
262
+ "NDCG@5": 0.38583,
263
+ "NDCG@10": 0.38394,
264
+ "NDCG@100": 0.4388,
265
+ "NDCG@1000": 0.46983
266
+ },
267
+ "nq": {
268
+ "NDCG@1": 0.292,
269
+ "NDCG@3": 0.39152,
270
+ "NDCG@5": 0.42893,
271
+ "NDCG@10": 0.46695,
272
+ "NDCG@100": 0.51647,
273
+ "NDCG@1000": 0.52768
274
+ }
275
+ }
276
+ }
pytorch_model.bin → pytorch_model-00001-of-00002.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3438cf9b193cdbfaa90926b46d3b892d5eca30a7c8841769e0001375cd5f8bfa
3
- size 10739644329
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e3447afc0e22ec32f948f4003fb987b04377216f0ce903359f065002e10bc24
3
+ size 9996985557
pytorch_model-00002-of-00002.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:199884372221b5bfdb95e93aca93755269fbbe9a34f50a5c2ae2579a4f00238f
3
+ size 742637183
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d41a5c9da0d7efc22fb98f1475b73e1b1100eeebe7bfa53d097369c00ea82e2
3
+ size 32846
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 2048, "special_tokens_map_file": null, "name_or_path": "EleutherAI/gpt-neo-2.7B", "errors": "replace", "pad_token": null, "add_bos_token": false, "tokenizer_class": "GPT2Tokenizer"}