hogru commited on
Commit
92eb358
1 Parent(s): 3bc36a6

Update tokenizer, bump hf versions

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 50.0,
3
- "perplexity": 2.8322171542330707,
4
- "test_accuracy": 0.35489852978435493,
5
- "test_loss": 1.0410598516464233,
6
- "test_runtime": 55.5978,
7
- "test_samples_per_second": 4293.445,
8
- "test_steps_per_second": 16.781,
9
- "train_loss": 1.2392261905236766,
10
- "train_runtime": 25350.8633,
11
  "train_samples": 1273104,
12
- "train_samples_per_second": 2510.968,
13
- "train_steps_per_second": 9.808
14
  }
 
1
  {
2
+ "epoch": 49.98,
3
+ "test_accuracy": 0.41162851869286315,
4
+ "test_loss": 1.1775240898132324,
5
+ "test_perplexity": 3.246326631292486,
6
+ "test_runtime": 55.1082,
7
+ "test_samples_per_second": 4331.591,
8
+ "test_steps_per_second": 16.93,
9
+ "train_loss": 1.379773639240096,
10
+ "train_runtime": 17824.9007,
11
  "train_samples": 1273104,
12
+ "train_samples_per_second": 3571.139,
13
+ "train_steps_per_second": 6.973
14
  }
config.json CHANGED
@@ -26,7 +26,7 @@
26
  "summary_type": "cls_index",
27
  "summary_use_proj": true,
28
  "torch_dtype": "float32",
29
- "transformers_version": "4.27.1",
30
  "use_cache": true,
31
- "vocab_size": 93
32
  }
 
26
  "summary_type": "cls_index",
27
  "summary_use_proj": true,
28
  "torch_dtype": "float32",
29
+ "transformers_version": "4.34.1",
30
  "use_cache": true,
31
+ "vocab_size": 181
32
  }
generation_config.json CHANGED
@@ -1,10 +1,9 @@
1
  {
2
  "do_sample": true,
3
  "eos_token_id": 1,
4
- "length_penalty": 0.0,
5
  "max_new_tokens": 128,
6
  "min_new_tokens": 1,
7
  "num_return_sequences": 100,
8
  "pad_token_id": 2,
9
- "transformers_version": "4.27.1"
10
  }
 
1
  {
2
  "do_sample": true,
3
  "eos_token_id": 1,
 
4
  "max_new_tokens": 128,
5
  "min_new_tokens": 1,
6
  "num_return_sequences": 100,
7
  "pad_token_id": 2,
8
+ "transformers_version": "4.34.1"
9
  }
model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:33e05a7877f04271d65f0db80d946c7cbd1cd24908eb50ed0f49b65d2a7b6270
3
- size 12375736
 
 
 
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75711e4f676270ff40fcfc23ea74221564ba4dafd829420db85f2ef42cdc11dc
3
- size 12414333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b3fa750ed5d361efc7d44edbf8184f3598205eb2163df1811cc5cb14dff2f2
3
+ size 12260637
special_tokens_map.json CHANGED
@@ -1,6 +1,30 @@
1
  {
2
- "bos_token": "^",
3
- "eos_token": "_",
4
- "pad_token": " ",
5
- "unk_token": "§"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "^",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "_",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": " ",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "§",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
  }
test_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 50.0,
3
- "perplexity": 2.8322171542330707,
4
- "test_accuracy": 0.35489852978435493,
5
- "test_loss": 1.0410598516464233,
6
- "test_runtime": 55.5978,
7
- "test_samples_per_second": 4293.445,
8
- "test_steps_per_second": 16.781
9
  }
 
1
  {
2
+ "epoch": 49.98,
3
+ "test_accuracy": 0.41162851869286315,
4
+ "test_loss": 1.1775240898132324,
5
+ "test_perplexity": 3.246326631292486,
6
+ "test_runtime": 55.1082,
7
+ "test_samples_per_second": 4331.591,
8
+ "test_steps_per_second": 16.93
9
  }
tokenizer.json CHANGED
@@ -1,7 +1,14 @@
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
@@ -108,383 +115,198 @@
108
  }
109
  }
110
  },
111
- "decoder": null,
 
 
 
 
112
  "model": {
113
- "type": "Unigram",
114
- "unk_id": 3,
115
- "vocab": [
116
- [
117
- "^",
118
- 0.0
119
- ],
120
- [
121
- "_",
122
- 0.0
123
- ],
124
- [
125
- " ",
126
- 0.0
127
- ],
128
- [
129
- "§",
130
- 0.0
131
- ],
132
- [
133
- "°",
134
- 0.0
135
- ],
136
- [
137
- "C",
138
- -2.2339120063759843
139
- ],
140
- [
141
- "c1",
142
- -3.108178186508052
143
- ],
144
- [
145
- "cc",
146
- -3.2719370147830347
147
- ],
148
- [
149
- "N",
150
- -3.2932537094066436
151
- ],
152
- [
153
- "O",
154
- -3.294834188186176
155
- ],
156
- [
157
- "n",
158
- -3.3053461817854917
159
- ],
160
- [
161
- ")",
162
- -3.3475212288619822
163
- ],
164
- [
165
- "CC",
166
- -3.4312206506012544
167
- ],
168
- [
169
- "(",
170
- -3.5624560302888746
171
- ],
172
- [
173
- "c(",
174
- -3.765813889966134
175
- ],
176
- [
177
- "c",
178
- -3.8218193366886393
179
- ],
180
- [
181
- "C(=O)",
182
- -3.856958418790814
183
- ],
184
- [
185
- "c3",
186
- -3.9254062324983607
187
- ],
188
- [
189
- "=",
190
- -3.975334503903719
191
- ],
192
- [
193
- "c2",
194
- -4.003423620506245
195
- ],
196
- [
197
- "1",
198
- -4.112680698145304
199
- ],
200
- [
201
- "(C)",
202
- -4.121209869936163
203
- ],
204
- [
205
- "2",
206
- -4.1361761826809555
207
- ],
208
- [
209
- "C1",
210
- -4.21768981835341
211
- ],
212
- [
213
- "c1ccc(",
214
- -4.251735247120498
215
- ],
216
- [
217
- "-",
218
- -4.252411803847931
219
- ],
220
- [
221
- "2)",
222
- -4.270562896654599
223
- ],
224
- [
225
- "cc(",
226
- -4.276471688527964
227
- ],
228
- [
229
- "C(",
230
- -4.3086368671374995
231
- ],
232
- [
233
- "CO",
234
- -4.337904719405833
235
- ],
236
- [
237
- "C(=O)N",
238
- -4.428846224698317
239
- ],
240
- [
241
- "3)",
242
- -4.449247292348714
243
- ],
244
- [
245
- "C2",
246
- -4.522375726063721
247
- ],
248
- [
249
- "O)",
250
- -4.5371556203299654
251
- ],
252
- [
253
- "CCC",
254
- -4.589271525409394
255
- ],
256
- [
257
- "3",
258
- -4.724794788518322
259
- ],
260
- [
261
- "C(C)",
262
- -4.750491328820496
263
- ],
264
- [
265
- "cc1",
266
- -4.774712806549024
267
- ],
268
- [
269
- "cn",
270
- -4.786244699312933
271
- ],
272
- [
273
- "NC(=O)",
274
- -4.789019365908235
275
- ],
276
- [
277
- "CC1",
278
- -4.79366206708262
279
- ],
280
- [
281
- "=O)",
282
- -4.843480092939821
283
- ],
284
- [
285
- "C)",
286
- -4.8437048190443175
287
- ],
288
- [
289
- "S",
290
- -4.850239832224922
291
- ],
292
- [
293
- "n1",
294
- -4.864510173862389
295
- ],
296
- [
297
- "c1ccccc1",
298
- -4.92499844194878
299
- ],
300
- [
301
- "c2ccc(",
302
- -5.024291675089154
303
- ],
304
- [
305
- "[nH]",
306
- -5.110243493192227
307
- ],
308
- [
309
- "c4",
310
- -5.110439906920133
311
- ],
312
- [
313
- "s",
314
- -5.124712107023752
315
- ],
316
- [
317
- "N1",
318
- -5.13109013422166
319
- ],
320
- [
321
- "o",
322
- -5.144341645447907
323
- ],
324
- [
325
- "F)",
326
- -5.173986710079323
327
- ],
328
- [
329
- "N(C",
330
- -5.185845166641148
331
- ],
332
- [
333
- "S(=O)(=O)",
334
- -5.212557352968634
335
- ],
336
- [
337
- "Cl)",
338
- -5.220168182623269
339
- ],
340
- [
341
- "c2ccc",
342
- -5.2702619038121234
343
- ],
344
- [
345
- "C(O)",
346
- -5.276647874822254
347
- ],
348
- [
349
- "2)cc1",
350
- -5.314256517516519
351
- ],
352
- [
353
- "O=C(",
354
- -5.316263615262265
355
- ],
356
- [
357
- "c3ccccc3",
358
- -5.35319737432803
359
- ],
360
- [
361
- "4",
362
- -5.365584577986091
363
- ],
364
- [
365
- "c(Cl)c",
366
- -5.396247268723645
367
- ],
368
- [
369
- "C=C",
370
- -5.435516000963592
371
- ],
372
- [
373
- "5",
374
- -5.521017741806011
375
- ],
376
- [
377
- "N2CC",
378
- -5.731120719935269
379
- ],
380
- [
381
- "c(F)c",
382
- -5.761758311176596
383
- ],
384
- [
385
- "C(F)(F)F)",
386
- -5.816389560959275
387
- ],
388
- [
389
- "[",
390
- -5.9137028227376565
391
- ],
392
- [
393
- "]",
394
- -5.9137028227376565
395
- ],
396
- [
397
- "c(OC)c",
398
- -5.948373907493249
399
- ],
400
- [
401
- "c(-c3cc",
402
- -6.076591700432848
403
- ],
404
- [
405
- "Br)",
406
- -6.173257390983636
407
- ],
408
- [
409
- "#",
410
- -6.406719606169013
411
- ],
412
- [
413
- "[N+](=O)[O-])",
414
- -6.4134780734073065
415
- ],
416
- [
417
- "+",
418
- -6.459711891957882
419
- ],
420
- [
421
- "F",
422
- -6.6661526991253
423
- ],
424
- [
425
- "P",
426
- -7.117461698495431
427
- ],
428
- [
429
- "6",
430
- -7.298200155096458
431
- ],
432
- [
433
- "B",
434
- -8.020755755896921
435
- ],
436
- [
437
- "I",
438
- -8.076335551264686
439
- ],
440
- [
441
- "7",
442
- -9.064406674700315
443
- ],
444
- [
445
- "H",
446
- -9.766986825131063
447
- ],
448
- [
449
- "8",
450
- -10.67648343715202
451
- ],
452
- [
453
- "9",
454
- -11.6208264490498
455
- ],
456
- [
457
- "%",
458
- -13.14094059722758
459
- ],
460
- [
461
- "0",
462
- -13.512972359677438
463
- ],
464
- [
465
- "p",
466
- -13.73258273972798
467
- ],
468
- [
469
- "l",
470
- -17.659453778747075
471
- ],
472
- [
473
- "e",
474
- -17.659553778747075
475
- ],
476
- [
477
- "i",
478
- -17.659653778747074
479
- ],
480
- [
481
- "r",
482
- -17.659753778747074
483
- ],
484
- [
485
- "b",
486
- -17.659753778747074
487
- ]
488
- ]
489
  }
490
  }
 
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Right",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 2,
9
+ "pad_type_id": 0,
10
+ "pad_token": " "
11
+ },
12
  "added_tokens": [
13
  {
14
  "id": 0,
 
115
  }
116
  }
117
  },
118
+ "decoder": {
119
+ "type": "WordPiece",
120
+ "prefix": "##",
121
+ "cleanup": false
122
+ },
123
  "model": {
124
+ "type": "WordPiece",
125
+ "unk_token": "§",
126
+ "continuing_subword_prefix": "##",
127
+ "max_input_chars_per_word": 1024,
128
+ "vocab": {
129
+ "^": 0,
130
+ "_": 1,
131
+ " ": 2,
132
+ "§": 3,
133
+ "°": 4,
134
+ "#": 5,
135
+ "%": 6,
136
+ "(": 7,
137
+ ")": 8,
138
+ "+": 9,
139
+ "-": 10,
140
+ "0": 11,
141
+ "1": 12,
142
+ "2": 13,
143
+ "3": 14,
144
+ "4": 15,
145
+ "5": 16,
146
+ "6": 17,
147
+ "7": 18,
148
+ "8": 19,
149
+ "9": 20,
150
+ "=": 21,
151
+ "B": 22,
152
+ "C": 23,
153
+ "F": 24,
154
+ "H": 25,
155
+ "I": 26,
156
+ "N": 27,
157
+ "O": 28,
158
+ "P": 29,
159
+ "S": 30,
160
+ "[": 31,
161
+ "]": 32,
162
+ "b": 33,
163
+ "c": 34,
164
+ "e": 35,
165
+ "i": 36,
166
+ "l": 37,
167
+ "n": 38,
168
+ "o": 39,
169
+ "p": 40,
170
+ "r": 41,
171
+ "s": 42,
172
+ "##c": 43,
173
+ "##1": 44,
174
+ "##(": 45,
175
+ "##C": 46,
176
+ "##=": 47,
177
+ "##2": 48,
178
+ "##O": 49,
179
+ "##)": 50,
180
+ "##n": 51,
181
+ "##-": 52,
182
+ "##l": 53,
183
+ "##N": 54,
184
+ "##S": 55,
185
+ "##3": 56,
186
+ "##[": 57,
187
+ "##H": 58,
188
+ "##]": 59,
189
+ "##+": 60,
190
+ "##o": 61,
191
+ "##F": 62,
192
+ "##4": 63,
193
+ "##B": 64,
194
+ "###": 65,
195
+ "##r": 66,
196
+ "##I": 67,
197
+ "##s": 68,
198
+ "##5": 69,
199
+ "##P": 70,
200
+ "##6": 71,
201
+ "##e": 72,
202
+ "##7": 73,
203
+ "##i": 74,
204
+ "##8": 75,
205
+ "##9": 76,
206
+ "##p": 77,
207
+ "##%": 78,
208
+ "##0": 79,
209
+ "##b": 80,
210
+ "##cc": 81,
211
+ "##CC": 82,
212
+ "##O)": 83,
213
+ "##C(": 84,
214
+ "##c1": 85,
215
+ "##=O)": 86,
216
+ "##c(": 87,
217
+ "##c2": 88,
218
+ "##C)": 89,
219
+ "##c1cc": 90,
220
+ "##C(=O)": 91,
221
+ "##c3": 92,
222
+ "##c2cc": 93,
223
+ "##)cc": 94,
224
+ "##(=O)": 95,
225
+ "##C1": 96,
226
+ "##c1ccc(": 97,
227
+ "##(C)": 98,
228
+ "##c3cc": 99,
229
+ "##2)": 100,
230
+ "##F)": 101,
231
+ "##CCCC": 102,
232
+ "##C(=O)N": 103,
233
+ "##c1cccc": 104,
234
+ "##C2": 105,
235
+ "##c2cccc": 106,
236
+ "##CN": 107,
237
+ "##Cl": 108,
238
+ "##C(C)": 109,
239
+ "##cn": 110,
240
+ "##)cc1": 111,
241
+ "##c4": 112,
242
+ "##CCN": 113,
243
+ "##3)": 114,
244
+ "CO": 115,
245
+ "##=C(": 116,
246
+ "##n1": 117,
247
+ "##c2ccccc2": 118,
248
+ "##nc(": 119,
249
+ "##c2ccc(": 120,
250
+ "##c1ccccc1": 121,
251
+ "##CC1": 122,
252
+ "##CO": 123,
253
+ "##c3cccc": 124,
254
+ "##[n": 125,
255
+ "##NC(=O)": 126,
256
+ "##N)": 127,
257
+ "##H]": 128,
258
+ "##[nH]": 129,
259
+ "##S(=O)": 130,
260
+ "##(C": 131,
261
+ "##S(=O)(=O)": 132,
262
+ "##Cl)": 133,
263
+ "O=C(": 134,
264
+ "##(F)": 135,
265
+ "##c3ccccc3": 136,
266
+ "##OC)": 137,
267
+ "##CC(=O)": 138,
268
+ "##C3": 139,
269
+ "##cccc": 140,
270
+ "##CC(": 141,
271
+ "##n2": 142,
272
+ "##c1cc(": 143,
273
+ "##c2c(": 144,
274
+ "##c1n": 145,
275
+ "##c3ccc(": 146,
276
+ "##cc1": 147,
277
+ "##CCN(": 148,
278
+ "##N1": 149,
279
+ "##CCO": 150,
280
+ "##C(F)": 151,
281
+ "##CC2": 152,
282
+ "##F)cc": 153,
283
+ "##c2)": 154,
284
+ "##cc(": 155,
285
+ "##c2ccccc2)": 156,
286
+ "##c2n": 157,
287
+ "##C(F)(F)": 158,
288
+ "##C(O)": 159,
289
+ "##Cl)cc": 160,
290
+ "##C=": 161,
291
+ "##c(=O)": 162,
292
+ "##+]": 163,
293
+ "##c1cccc(": 164,
294
+ "##4)": 165,
295
+ "##OC": 166,
296
+ "COc1ccc(": 167,
297
+ "##c1)": 168,
298
+ "##=N": 169,
299
+ "##C(C": 170,
300
+ "##=O": 171,
301
+ "##c2cc(": 172,
302
+ "##N2": 173,
303
+ "##CCCN": 174,
304
+ "##c(-": 175,
305
+ "##-]": 176,
306
+ "CC(C)": 177,
307
+ "##[N": 178,
308
+ "##O-]": 179,
309
+ "##[O-]": 180
310
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
311
  }
312
  }
tokenizer_config.json CHANGED
@@ -1,10 +1,55 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "^",
 
3
  "eos_token": "_",
4
- "model_max_length": 128,
 
 
5
  "pad_token": " ",
 
6
  "padding_side": "right",
7
- "special_tokens_map_file": "../../tokenizers/char_unigram_88/special_tokens_map.json",
8
  "tokenizer_class": "PreTrainedTokenizerFast",
9
  "truncation_side": "left",
10
  "unk_token": "§"
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "^",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "_",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": " ",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "§",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "°",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
  "bos_token": "^",
45
+ "clean_up_tokenization_spaces": true,
46
  "eos_token": "_",
47
+ "max_length": null,
48
+ "model_max_length": 1024,
49
+ "pad_to_multiple_of": null,
50
  "pad_token": " ",
51
+ "pad_token_type_id": 0,
52
  "padding_side": "right",
 
53
  "tokenizer_class": "PreTrainedTokenizerFast",
54
  "truncation_side": "left",
55
  "unk_token": "§"
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 50.0,
3
- "train_loss": 1.2392261905236766,
4
- "train_runtime": 25350.8633,
5
  "train_samples": 1273104,
6
- "train_samples_per_second": 2510.968,
7
- "train_steps_per_second": 9.808
8
  }
 
1
  {
2
+ "epoch": 49.98,
3
+ "train_loss": 1.379773639240096,
4
+ "train_runtime": 17824.9007,
5
  "train_samples": 1273104,
6
+ "train_samples_per_second": 3571.139,
7
+ "train_steps_per_second": 6.973
8
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d7f23e857016ba44427fcedf175d39d5d54474fc70e0d9f748a4969ddc5394f
3
- size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7270b826d836c540d0751fafd26f6bd57648b6cc071d87adf3f61ae00f263d74
3
+ size 4155