pszemraj commited on
Commit
7a20e92
1 Parent(s): 49f0b07

add new checkpoint trained for a hundred steps with smaller max grad norm and weight decay

Browse files
Files changed (7) hide show
  1. latest +1 -1
  2. merges.txt +1 -1
  3. pytorch_model.bin +1 -1
  4. tokenizer_config.json +1 -1
  5. trainer_state.json +79 -1155
  6. training_args.bin +2 -2
  7. vocab.json +0 -0
latest CHANGED
@@ -1 +1 @@
1
- global_step1016
1
+ global_step126
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cad2a7fef5856274323ae7e812eec18858a768fd7d3f83bcfc3c2e9d0d3d1de
3
  size 5263006227
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a3f78791c05f0aa6c18ceedb9f770c34d1f1c993af764750cdc8ddb3a42a1d8
3
  size 5263006227
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"errors": "replace", "unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "add_bos_token": true, "special_tokens_map_file": null, "name_or_path": "pszemraj/opt-peter-1.3B", "model_max_length": 512, "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "errors": "replace", "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_bos_token": true, "special_tokens_map_file": null, "name_or_path": "pszemraj/opt-peter-1.3B", "model_max_length": 512, "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,1242 +1,166 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.6457290244272493,
5
- "global_step": 1016,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
12
- "learning_rate": 2.4193548387096776e-06,
13
- "loss": 3.4647,
14
  "step": 5
15
  },
16
  {
17
- "epoch": 0.02,
18
- "learning_rate": 4.838709677419355e-06,
19
- "loss": 2.4108,
20
  "step": 10
21
  },
22
  {
23
- "epoch": 0.02,
24
- "learning_rate": 7.258064516129032e-06,
25
- "loss": 1.5747,
26
  "step": 15
27
  },
28
  {
29
- "epoch": 0.03,
30
- "learning_rate": 9.67741935483871e-06,
31
- "loss": 1.5005,
32
  "step": 20
33
  },
34
  {
35
- "epoch": 0.04,
36
- "learning_rate": 1.2096774193548387e-05,
37
- "loss": 1.4694,
38
  "step": 25
39
  },
40
  {
41
- "epoch": 0.05,
42
- "learning_rate": 1.4516129032258065e-05,
43
- "loss": 1.4399,
44
  "step": 30
45
  },
46
  {
47
- "epoch": 0.06,
48
- "learning_rate": 1.6935483870967744e-05,
49
- "loss": 1.4215,
50
  "step": 35
51
  },
52
  {
53
- "epoch": 0.06,
54
- "learning_rate": 1.935483870967742e-05,
55
- "loss": 1.4526,
56
  "step": 40
57
  },
58
  {
59
- "epoch": 0.07,
60
- "learning_rate": 2.1774193548387097e-05,
61
- "loss": 1.4458,
62
  "step": 45
63
  },
64
  {
65
- "epoch": 0.08,
66
- "learning_rate": 2.4193548387096773e-05,
67
- "loss": 1.4237,
68
  "step": 50
69
  },
70
  {
71
- "epoch": 0.09,
72
- "learning_rate": 2.6612903225806453e-05,
73
- "loss": 1.4339,
74
  "step": 55
75
  },
76
  {
77
- "epoch": 0.1,
78
- "learning_rate": 2.903225806451613e-05,
79
- "loss": 1.4294,
80
  "step": 60
81
  },
82
  {
83
- "epoch": 0.11,
84
- "learning_rate": 2.999951499529191e-05,
85
- "loss": 1.4968,
86
  "step": 65
87
  },
88
  {
89
- "epoch": 0.11,
90
- "learning_rate": 2.9996551191211948e-05,
91
- "loss": 1.4261,
92
  "step": 70
93
  },
94
  {
95
- "epoch": 0.12,
96
- "learning_rate": 2.9990893561853812e-05,
97
- "loss": 1.4371,
98
  "step": 75
99
  },
100
  {
101
- "epoch": 0.13,
102
- "learning_rate": 2.9982543123495507e-05,
103
- "loss": 1.4412,
104
  "step": 80
105
  },
106
  {
107
- "epoch": 0.14,
108
- "learning_rate": 2.9971501376123366e-05,
109
- "loss": 1.4638,
110
  "step": 85
111
  },
112
  {
113
- "epoch": 0.15,
114
- "learning_rate": 2.9957770303162634e-05,
115
- "loss": 1.4498,
116
  "step": 90
117
  },
118
  {
119
- "epoch": 0.15,
120
- "learning_rate": 2.9941352371121173e-05,
121
- "loss": 1.4393,
122
  "step": 95
123
  },
124
  {
125
- "epoch": 0.16,
126
- "learning_rate": 2.992225052914641e-05,
127
- "loss": 1.4291,
128
  "step": 100
129
  },
130
  {
131
- "epoch": 0.17,
132
- "learning_rate": 2.990046820849558e-05,
133
- "loss": 1.4587,
134
  "step": 105
135
  },
136
  {
137
- "epoch": 0.18,
138
- "learning_rate": 2.9876009321919372e-05,
139
- "loss": 1.4272,
140
  "step": 110
141
  },
142
  {
143
- "epoch": 0.19,
144
- "learning_rate": 2.9848878262959076e-05,
145
- "loss": 1.4622,
146
  "step": 115
147
  },
148
  {
149
- "epoch": 0.19,
150
- "learning_rate": 2.981907990515739e-05,
151
- "loss": 1.4863,
152
  "step": 120
153
  },
154
  {
155
- "epoch": 0.2,
156
- "learning_rate": 2.9786619601182965e-05,
157
- "loss": 1.4274,
158
  "step": 125
159
- },
160
- {
161
- "epoch": 0.21,
162
- "learning_rate": 2.975150318186892e-05,
163
- "loss": 1.4382,
164
- "step": 130
165
- },
166
- {
167
- "epoch": 0.22,
168
- "learning_rate": 2.9713736955165456e-05,
169
- "loss": 1.4413,
170
- "step": 135
171
- },
172
- {
173
- "epoch": 0.23,
174
- "learning_rate": 2.9673327705006745e-05,
175
- "loss": 1.431,
176
- "step": 140
177
- },
178
- {
179
- "epoch": 0.23,
180
- "learning_rate": 2.963028269009235e-05,
181
- "loss": 1.4659,
182
- "step": 145
183
- },
184
- {
185
- "epoch": 0.24,
186
- "learning_rate": 2.9584609642583337e-05,
187
- "loss": 1.4426,
188
- "step": 150
189
- },
190
- {
191
- "epoch": 0.25,
192
- "learning_rate": 2.9536316766713357e-05,
193
- "loss": 1.4733,
194
- "step": 155
195
- },
196
- {
197
- "epoch": 0.26,
198
- "learning_rate": 2.9485412737314923e-05,
199
- "loss": 1.4595,
200
- "step": 160
201
- },
202
- {
203
- "epoch": 0.27,
204
- "learning_rate": 2.9431906698261136e-05,
205
- "loss": 1.4845,
206
- "step": 165
207
- },
208
- {
209
- "epoch": 0.28,
210
- "learning_rate": 2.9375808260823192e-05,
211
- "loss": 1.5219,
212
- "step": 170
213
- },
214
- {
215
- "epoch": 0.28,
216
- "learning_rate": 2.931712750194392e-05,
217
- "loss": 1.532,
218
- "step": 175
219
- },
220
- {
221
- "epoch": 0.29,
222
- "learning_rate": 2.9255874962427638e-05,
223
- "loss": 1.4632,
224
- "step": 180
225
- },
226
- {
227
- "epoch": 0.3,
228
- "learning_rate": 2.9192061645046724e-05,
229
- "loss": 1.5057,
230
- "step": 185
231
- },
232
- {
233
- "epoch": 0.31,
234
- "learning_rate": 2.9125699012565204e-05,
235
- "loss": 1.5023,
236
- "step": 190
237
- },
238
- {
239
- "epoch": 0.32,
240
- "learning_rate": 2.9056798985679688e-05,
241
- "loss": 1.4423,
242
- "step": 195
243
- },
244
- {
245
- "epoch": 0.32,
246
- "learning_rate": 2.8985373940878053e-05,
247
- "loss": 1.4968,
248
- "step": 200
249
- },
250
- {
251
- "epoch": 0.33,
252
- "learning_rate": 2.8911436708216276e-05,
253
- "loss": 1.4767,
254
- "step": 205
255
- },
256
- {
257
- "epoch": 0.34,
258
- "learning_rate": 2.883500056901376e-05,
259
- "loss": 1.4968,
260
- "step": 210
261
- },
262
- {
263
- "epoch": 0.35,
264
- "learning_rate": 2.875607925346762e-05,
265
- "loss": 1.4842,
266
- "step": 215
267
- },
268
- {
269
- "epoch": 0.36,
270
- "learning_rate": 2.867468693818634e-05,
271
- "loss": 1.4921,
272
- "step": 220
273
- },
274
- {
275
- "epoch": 0.36,
276
- "learning_rate": 2.859083824364323e-05,
277
- "loss": 1.4969,
278
- "step": 225
279
- },
280
- {
281
- "epoch": 0.37,
282
- "learning_rate": 2.8504548231550143e-05,
283
- "loss": 1.4423,
284
- "step": 230
285
- },
286
- {
287
- "epoch": 0.38,
288
- "learning_rate": 2.8415832402151956e-05,
289
- "loss": 1.5165,
290
- "step": 235
291
- },
292
- {
293
- "epoch": 0.39,
294
- "learning_rate": 2.832470669144227e-05,
295
- "loss": 1.4715,
296
- "step": 240
297
- },
298
- {
299
- "epoch": 0.4,
300
- "learning_rate": 2.8231187468300836e-05,
301
- "loss": 1.4747,
302
- "step": 245
303
- },
304
- {
305
- "epoch": 0.4,
306
- "learning_rate": 2.8135291531553192e-05,
307
- "loss": 1.466,
308
- "step": 250
309
- },
310
- {
311
- "epoch": 0.41,
312
- "learning_rate": 2.8037036106953134e-05,
313
- "loss": 1.4609,
314
- "step": 255
315
- },
316
- {
317
- "epoch": 0.42,
318
- "learning_rate": 2.793643884408843e-05,
319
- "loss": 1.4649,
320
- "step": 260
321
- },
322
- {
323
- "epoch": 0.43,
324
- "learning_rate": 2.7833517813210437e-05,
325
- "loss": 1.4923,
326
- "step": 265
327
- },
328
- {
329
- "epoch": 0.44,
330
- "learning_rate": 2.7728291501988173e-05,
331
- "loss": 1.4968,
332
- "step": 270
333
- },
334
- {
335
- "epoch": 0.45,
336
- "learning_rate": 2.7620778812187338e-05,
337
- "loss": 1.4545,
338
- "step": 275
339
- },
340
- {
341
- "epoch": 0.45,
342
- "learning_rate": 2.7510999056275038e-05,
343
- "loss": 1.4791,
344
- "step": 280
345
- },
346
- {
347
- "epoch": 0.46,
348
- "learning_rate": 2.739897195395067e-05,
349
- "loss": 1.4849,
350
- "step": 285
351
- },
352
- {
353
- "epoch": 0.47,
354
- "learning_rate": 2.728471762860369e-05,
355
- "loss": 1.4492,
356
- "step": 290
357
- },
358
- {
359
- "epoch": 0.48,
360
- "learning_rate": 2.716825660369885e-05,
361
- "loss": 1.5052,
362
- "step": 295
363
- },
364
- {
365
- "epoch": 0.49,
366
- "learning_rate": 2.704960979908957e-05,
367
- "loss": 1.4701,
368
- "step": 300
369
- },
370
- {
371
- "epoch": 0.49,
372
- "learning_rate": 2.6928798527260127e-05,
373
- "loss": 1.48,
374
- "step": 305
375
- },
376
- {
377
- "epoch": 0.5,
378
- "learning_rate": 2.680584448949729e-05,
379
- "loss": 1.5158,
380
- "step": 310
381
- },
382
- {
383
- "epoch": 0.51,
384
- "learning_rate": 2.6680769771992136e-05,
385
- "loss": 1.5047,
386
- "step": 315
387
- },
388
- {
389
- "epoch": 0.52,
390
- "learning_rate": 2.6553596841872682e-05,
391
- "loss": 1.5023,
392
- "step": 320
393
- },
394
- {
395
- "epoch": 0.53,
396
- "learning_rate": 2.6424348543168177e-05,
397
- "loss": 1.4932,
398
- "step": 325
399
- },
400
- {
401
- "epoch": 0.53,
402
- "learning_rate": 2.6293048092705586e-05,
403
- "loss": 1.5241,
404
- "step": 330
405
- },
406
- {
407
- "epoch": 0.54,
408
- "learning_rate": 2.6159719075939196e-05,
409
- "loss": 1.4969,
410
- "step": 335
411
- },
412
- {
413
- "epoch": 0.55,
414
- "learning_rate": 2.602438544271395e-05,
415
- "loss": 1.4983,
416
- "step": 340
417
- },
418
- {
419
- "epoch": 0.56,
420
- "learning_rate": 2.5887071502963338e-05,
421
- "loss": 1.4801,
422
- "step": 345
423
- },
424
- {
425
- "epoch": 0.57,
426
- "learning_rate": 2.574780192234264e-05,
427
- "loss": 1.4595,
428
- "step": 350
429
- },
430
- {
431
- "epoch": 0.57,
432
- "learning_rate": 2.5606601717798212e-05,
433
- "loss": 1.4901,
434
- "step": 355
435
- },
436
- {
437
- "epoch": 0.58,
438
- "learning_rate": 2.5463496253073726e-05,
439
- "loss": 1.4939,
440
- "step": 360
441
- },
442
- {
443
- "epoch": 0.59,
444
- "learning_rate": 2.531851123415406e-05,
445
- "loss": 1.4791,
446
- "step": 365
447
- },
448
- {
449
- "epoch": 0.6,
450
- "learning_rate": 2.5171672704647785e-05,
451
- "loss": 1.448,
452
- "step": 370
453
- },
454
- {
455
- "epoch": 0.61,
456
- "learning_rate": 2.502300704110891e-05,
457
- "loss": 1.4857,
458
- "step": 375
459
- },
460
- {
461
- "epoch": 0.61,
462
- "learning_rate": 2.4872540948298913e-05,
463
- "loss": 1.4829,
464
- "step": 380
465
- },
466
- {
467
- "epoch": 0.62,
468
- "learning_rate": 2.472030145438974e-05,
469
- "loss": 1.4786,
470
- "step": 385
471
- },
472
- {
473
- "epoch": 0.63,
474
- "learning_rate": 2.4566315906108772e-05,
475
- "loss": 1.4118,
476
- "step": 390
477
- },
478
- {
479
- "epoch": 0.64,
480
- "learning_rate": 2.4410611963826522e-05,
481
- "loss": 1.429,
482
- "step": 395
483
- },
484
- {
485
- "epoch": 0.65,
486
- "learning_rate": 2.4253217596588036e-05,
487
- "loss": 1.4719,
488
- "step": 400
489
- },
490
- {
491
- "epoch": 0.66,
492
- "learning_rate": 2.4094161077088784e-05,
493
- "loss": 1.5054,
494
- "step": 405
495
- },
496
- {
497
- "epoch": 0.66,
498
- "learning_rate": 2.3933470976596088e-05,
499
- "loss": 1.4493,
500
- "step": 410
501
- },
502
- {
503
- "epoch": 0.67,
504
- "learning_rate": 2.3771176159816846e-05,
505
- "loss": 1.4957,
506
- "step": 415
507
- },
508
- {
509
- "epoch": 0.68,
510
- "learning_rate": 2.360730577971259e-05,
511
- "loss": 1.4764,
512
- "step": 420
513
- },
514
- {
515
- "epoch": 0.69,
516
- "learning_rate": 2.3441889272262742e-05,
517
- "loss": 1.5056,
518
- "step": 425
519
- },
520
- {
521
- "epoch": 0.7,
522
- "learning_rate": 2.3274956351177037e-05,
523
- "loss": 1.4732,
524
- "step": 430
525
- },
526
- {
527
- "epoch": 0.7,
528
- "learning_rate": 2.3106537002558074e-05,
529
- "loss": 1.5047,
530
- "step": 435
531
- },
532
- {
533
- "epoch": 0.71,
534
- "learning_rate": 2.293666147951491e-05,
535
- "loss": 1.5098,
536
- "step": 440
537
- },
538
- {
539
- "epoch": 0.72,
540
- "learning_rate": 2.2765360296728697e-05,
541
- "loss": 1.504,
542
- "step": 445
543
- },
544
- {
545
- "epoch": 0.73,
546
- "learning_rate": 2.259266422497137e-05,
547
- "loss": 1.4742,
548
- "step": 450
549
- },
550
- {
551
- "epoch": 0.74,
552
- "learning_rate": 2.2418604285578273e-05,
553
- "loss": 1.482,
554
- "step": 455
555
- },
556
- {
557
- "epoch": 0.74,
558
- "learning_rate": 2.2243211744875818e-05,
559
- "loss": 1.4702,
560
- "step": 460
561
- },
562
- {
563
- "epoch": 0.75,
564
- "learning_rate": 2.2066518108565137e-05,
565
- "loss": 1.4839,
566
- "step": 465
567
- },
568
- {
569
- "epoch": 0.76,
570
- "learning_rate": 2.18885551160627e-05,
571
- "loss": 1.4942,
572
- "step": 470
573
- },
574
- {
575
- "epoch": 0.77,
576
- "learning_rate": 2.1709354734798998e-05,
577
- "loss": 1.4497,
578
- "step": 475
579
- },
580
- {
581
- "epoch": 0.78,
582
- "learning_rate": 2.152894915447624e-05,
583
- "loss": 1.4927,
584
- "step": 480
585
- },
586
- {
587
- "epoch": 0.78,
588
- "learning_rate": 2.134737078128611e-05,
589
- "loss": 1.4794,
590
- "step": 485
591
- },
592
- {
593
- "epoch": 0.79,
594
- "learning_rate": 2.1164652232088674e-05,
595
- "loss": 1.5034,
596
- "step": 490
597
- },
598
- {
599
- "epoch": 0.8,
600
- "learning_rate": 2.0980826328553416e-05,
601
- "loss": 1.5157,
602
- "step": 495
603
- },
604
- {
605
- "epoch": 0.81,
606
- "learning_rate": 2.0795926091263504e-05,
607
- "loss": 1.4681,
608
- "step": 500
609
- },
610
- {
611
- "epoch": 0.82,
612
- "learning_rate": 2.0609984733784287e-05,
613
- "loss": 1.4732,
614
- "step": 505
615
- },
616
- {
617
- "epoch": 0.83,
618
- "learning_rate": 2.042303565669719e-05,
619
- "loss": 1.5047,
620
- "step": 510
621
- },
622
- {
623
- "epoch": 0.83,
624
- "learning_rate": 2.0235112441599948e-05,
625
- "loss": 1.5093,
626
- "step": 515
627
- },
628
- {
629
- "epoch": 0.84,
630
- "learning_rate": 2.0046248845074373e-05,
631
- "loss": 1.4515,
632
- "step": 520
633
- },
634
- {
635
- "epoch": 0.85,
636
- "learning_rate": 1.9856478792622666e-05,
637
- "loss": 1.5051,
638
- "step": 525
639
- },
640
- {
641
- "epoch": 0.86,
642
- "learning_rate": 1.9665836372573397e-05,
643
- "loss": 1.5073,
644
- "step": 530
645
- },
646
- {
647
- "epoch": 0.87,
648
- "learning_rate": 1.947435582995821e-05,
649
- "loss": 1.4952,
650
- "step": 535
651
- },
652
- {
653
- "epoch": 0.87,
654
- "learning_rate": 1.928207156036043e-05,
655
- "loss": 1.4308,
656
- "step": 540
657
- },
658
- {
659
- "epoch": 0.88,
660
- "learning_rate": 1.9089018103736568e-05,
661
- "loss": 1.4588,
662
- "step": 545
663
- },
664
- {
665
- "epoch": 0.89,
666
- "learning_rate": 1.8895230138211942e-05,
667
- "loss": 1.5477,
668
- "step": 550
669
- },
670
- {
671
- "epoch": 0.9,
672
- "learning_rate": 1.870074247385144e-05,
673
- "loss": 1.4979,
674
- "step": 555
675
- },
676
- {
677
- "epoch": 0.91,
678
- "learning_rate": 1.8505590046406615e-05,
679
- "loss": 1.4487,
680
- "step": 560
681
- },
682
- {
683
- "epoch": 0.91,
684
- "learning_rate": 1.8309807911040186e-05,
685
- "loss": 1.4671,
686
- "step": 565
687
- },
688
- {
689
- "epoch": 0.92,
690
- "learning_rate": 1.8113431236029078e-05,
691
- "loss": 1.4486,
692
- "step": 570
693
- },
694
- {
695
- "epoch": 0.93,
696
- "learning_rate": 1.7916495296447162e-05,
697
- "loss": 1.483,
698
- "step": 575
699
- },
700
- {
701
- "epoch": 0.94,
702
- "learning_rate": 1.771903546782883e-05,
703
- "loss": 1.4896,
704
- "step": 580
705
- },
706
- {
707
- "epoch": 0.95,
708
- "learning_rate": 1.7521087219814454e-05,
709
- "loss": 1.5259,
710
- "step": 585
711
- },
712
- {
713
- "epoch": 0.95,
714
- "learning_rate": 1.7322686109779032e-05,
715
- "loss": 1.4845,
716
- "step": 590
717
- },
718
- {
719
- "epoch": 0.96,
720
- "learning_rate": 1.7123867776445e-05,
721
- "loss": 1.4866,
722
- "step": 595
723
- },
724
- {
725
- "epoch": 0.97,
726
- "learning_rate": 1.692466793348047e-05,
727
- "loss": 1.4968,
728
- "step": 600
729
- },
730
- {
731
- "epoch": 0.98,
732
- "learning_rate": 1.6725122363084004e-05,
733
- "loss": 1.4582,
734
- "step": 605
735
- },
736
- {
737
- "epoch": 0.99,
738
- "learning_rate": 1.6525266909557046e-05,
739
- "loss": 1.4605,
740
- "step": 610
741
- },
742
- {
743
- "epoch": 1.0,
744
- "learning_rate": 1.6325137472865262e-05,
745
- "loss": 1.4391,
746
- "step": 615
747
- },
748
- {
749
- "epoch": 1.0,
750
- "eval_loss": 3.6259799003601074,
751
- "eval_runtime": 967.3528,
752
- "eval_samples_per_second": 8.998,
753
- "eval_steps_per_second": 2.249,
754
- "step": 617
755
- },
756
- {
757
- "epoch": 1.0,
758
- "learning_rate": 1.6124770002189804e-05,
759
- "loss": 1.5992,
760
- "step": 620
761
- },
762
- {
763
- "epoch": 1.01,
764
- "learning_rate": 1.5924200489469782e-05,
765
- "loss": 1.1688,
766
- "step": 625
767
- },
768
- {
769
- "epoch": 1.02,
770
- "learning_rate": 1.572346496293706e-05,
771
- "loss": 1.1778,
772
- "step": 630
773
- },
774
- {
775
- "epoch": 1.03,
776
- "learning_rate": 1.5522599480644496e-05,
777
- "loss": 1.1652,
778
- "step": 635
779
- },
780
- {
781
- "epoch": 1.04,
782
- "learning_rate": 1.532164012398886e-05,
783
- "loss": 1.1344,
784
- "step": 640
785
- },
786
- {
787
- "epoch": 1.05,
788
- "learning_rate": 1.5120622991229545e-05,
789
- "loss": 1.1474,
790
- "step": 645
791
- },
792
- {
793
- "epoch": 1.05,
794
- "learning_rate": 1.4919584191004244e-05,
795
- "loss": 1.1457,
796
- "step": 650
797
- },
798
- {
799
- "epoch": 1.06,
800
- "learning_rate": 1.471855983584276e-05,
801
- "loss": 1.1441,
802
- "step": 655
803
- },
804
- {
805
- "epoch": 1.07,
806
- "learning_rate": 1.4517586035680145e-05,
807
- "loss": 1.1546,
808
- "step": 660
809
- },
810
- {
811
- "epoch": 1.08,
812
- "learning_rate": 1.431669889137027e-05,
813
- "loss": 1.1526,
814
- "step": 665
815
- },
816
- {
817
- "epoch": 1.09,
818
- "learning_rate": 1.4115934488201047e-05,
819
- "loss": 1.1778,
820
- "step": 670
821
- },
822
- {
823
- "epoch": 1.09,
824
- "learning_rate": 1.3915328889412434e-05,
825
- "loss": 1.1468,
826
- "step": 675
827
- },
828
- {
829
- "epoch": 1.1,
830
- "learning_rate": 1.3714918129718418e-05,
831
- "loss": 1.1367,
832
- "step": 680
833
- },
834
- {
835
- "epoch": 1.11,
836
- "learning_rate": 1.3514738208834112e-05,
837
- "loss": 1.1972,
838
- "step": 685
839
- },
840
- {
841
- "epoch": 1.12,
842
- "learning_rate": 1.331482508500912e-05,
843
- "loss": 1.1701,
844
- "step": 690
845
- },
846
- {
847
- "epoch": 1.13,
848
- "learning_rate": 1.31152146685684e-05,
849
- "loss": 1.1911,
850
- "step": 695
851
- },
852
- {
853
- "epoch": 1.13,
854
- "learning_rate": 1.2915942815461677e-05,
855
- "loss": 1.1758,
856
- "step": 700
857
- },
858
- {
859
- "epoch": 1.14,
860
- "learning_rate": 1.2717045320822658e-05,
861
- "loss": 1.1486,
862
- "step": 705
863
- },
864
- {
865
- "epoch": 1.15,
866
- "learning_rate": 1.2518557912539185e-05,
867
- "loss": 1.1502,
868
- "step": 710
869
- },
870
- {
871
- "epoch": 1.16,
872
- "learning_rate": 1.232051624483541e-05,
873
- "loss": 1.1459,
874
- "step": 715
875
- },
876
- {
877
- "epoch": 1.17,
878
- "learning_rate": 1.2122955891867278e-05,
879
- "loss": 1.1546,
880
- "step": 720
881
- },
882
- {
883
- "epoch": 1.17,
884
- "learning_rate": 1.1925912341332324e-05,
885
- "loss": 1.165,
886
- "step": 725
887
- },
888
- {
889
- "epoch": 1.18,
890
- "learning_rate": 1.1729420988095042e-05,
891
- "loss": 1.1548,
892
- "step": 730
893
- },
894
- {
895
- "epoch": 1.19,
896
- "learning_rate": 1.1533517127828926e-05,
897
- "loss": 1.1454,
898
- "step": 735
899
- },
900
- {
901
- "epoch": 1.2,
902
- "learning_rate": 1.1338235950676305e-05,
903
- "loss": 1.19,
904
- "step": 740
905
- },
906
- {
907
- "epoch": 1.21,
908
- "learning_rate": 1.1143612534927153e-05,
909
- "loss": 1.1475,
910
- "step": 745
911
- },
912
- {
913
- "epoch": 1.22,
914
- "learning_rate": 1.0949681840717997e-05,
915
- "loss": 1.1754,
916
- "step": 750
917
- },
918
- {
919
- "epoch": 1.22,
920
- "learning_rate": 1.0756478703752036e-05,
921
- "loss": 1.1041,
922
- "step": 755
923
- },
924
- {
925
- "epoch": 1.23,
926
- "learning_rate": 1.0564037829041609e-05,
927
- "loss": 1.1465,
928
- "step": 760
929
- },
930
- {
931
- "epoch": 1.24,
932
- "learning_rate": 1.037239378467416e-05,
933
- "loss": 1.1704,
934
- "step": 765
935
- },
936
- {
937
- "epoch": 1.25,
938
- "learning_rate": 1.0181580995602766e-05,
939
- "loss": 1.1716,
940
- "step": 770
941
- },
942
- {
943
- "epoch": 1.26,
944
- "learning_rate": 9.991633737462405e-06,
945
- "loss": 1.1902,
946
- "step": 775
947
- },
948
- {
949
- "epoch": 1.26,
950
- "learning_rate": 9.802586130413045e-06,
951
- "loss": 1.1342,
952
- "step": 780
953
- },
954
- {
955
- "epoch": 1.27,
956
- "learning_rate": 9.614472133010623e-06,
957
- "loss": 1.1848,
958
- "step": 785
959
- },
960
- {
961
- "epoch": 1.28,
962
- "learning_rate": 9.42732553610712e-06,
963
- "loss": 1.1652,
964
- "step": 790
965
- },
966
- {
967
- "epoch": 1.29,
968
- "learning_rate": 9.241179956780689e-06,
969
- "loss": 1.1832,
970
- "step": 795
971
- },
972
- {
973
- "epoch": 1.3,
974
- "learning_rate": 9.056068832297041e-06,
975
- "loss": 1.1935,
976
- "step": 800
977
- },
978
- {
979
- "epoch": 1.3,
980
- "learning_rate": 8.872025414103135e-06,
981
- "loss": 1.1714,
982
- "step": 805
983
- },
984
- {
985
- "epoch": 1.31,
986
- "learning_rate": 8.689082761854213e-06,
987
- "loss": 1.1525,
988
- "step": 810
989
- },
990
- {
991
- "epoch": 1.32,
992
- "learning_rate": 8.507273737475307e-06,
993
- "loss": 1.1701,
994
- "step": 815
995
- },
996
- {
997
- "epoch": 1.33,
998
- "learning_rate": 8.326630999258286e-06,
999
- "loss": 1.1761,
1000
- "step": 820
1001
- },
1002
- {
1003
- "epoch": 1.34,
1004
- "learning_rate": 8.14718699599542e-06,
1005
- "loss": 1.1323,
1006
- "step": 825
1007
- },
1008
- {
1009
- "epoch": 1.34,
1010
- "learning_rate": 7.968973961150653e-06,
1011
- "loss": 1.1768,
1012
- "step": 830
1013
- },
1014
- {
1015
- "epoch": 1.35,
1016
- "learning_rate": 7.792023907069486e-06,
1017
- "loss": 1.1487,
1018
- "step": 835
1019
- },
1020
- {
1021
- "epoch": 1.36,
1022
- "learning_rate": 7.616368619228645e-06,
1023
- "loss": 1.1445,
1024
- "step": 840
1025
- },
1026
- {
1027
- "epoch": 1.37,
1028
- "learning_rate": 7.442039650526419e-06,
1029
- "loss": 1.1348,
1030
- "step": 845
1031
- },
1032
- {
1033
- "epoch": 1.38,
1034
- "learning_rate": 7.2690683156148705e-06,
1035
- "loss": 1.1478,
1036
- "step": 850
1037
- },
1038
- {
1039
- "epoch": 1.39,
1040
- "learning_rate": 7.097485685274776e-06,
1041
- "loss": 1.1625,
1042
- "step": 855
1043
- },
1044
- {
1045
- "epoch": 1.39,
1046
- "learning_rate": 6.927322580834376e-06,
1047
- "loss": 1.1331,
1048
- "step": 860
1049
- },
1050
- {
1051
- "epoch": 1.4,
1052
- "learning_rate": 6.758609568632982e-06,
1053
- "loss": 1.1608,
1054
- "step": 865
1055
- },
1056
- {
1057
- "epoch": 1.41,
1058
- "learning_rate": 6.591376954530345e-06,
1059
- "loss": 1.1349,
1060
- "step": 870
1061
- },
1062
- {
1063
- "epoch": 1.42,
1064
- "learning_rate": 6.4256547784628e-06,
1065
- "loss": 1.1569,
1066
- "step": 875
1067
- },
1068
- {
1069
- "epoch": 1.43,
1070
- "learning_rate": 6.261472809047244e-06,
1071
- "loss": 1.152,
1072
- "step": 880
1073
- },
1074
- {
1075
- "epoch": 1.43,
1076
- "learning_rate": 6.098860538233769e-06,
1077
- "loss": 1.1498,
1078
- "step": 885
1079
- },
1080
- {
1081
- "epoch": 1.44,
1082
- "learning_rate": 5.937847176008072e-06,
1083
- "loss": 1.1802,
1084
- "step": 890
1085
- },
1086
- {
1087
- "epoch": 1.45,
1088
- "learning_rate": 5.778461645144438e-06,
1089
- "loss": 1.1717,
1090
- "step": 895
1091
- },
1092
- {
1093
- "epoch": 1.46,
1094
- "learning_rate": 5.6207325760103845e-06,
1095
- "loss": 1.152,
1096
- "step": 900
1097
- },
1098
- {
1099
- "epoch": 1.47,
1100
- "learning_rate": 5.464688301423782e-06,
1101
- "loss": 1.1414,
1102
- "step": 905
1103
- },
1104
- {
1105
- "epoch": 1.47,
1106
- "learning_rate": 5.310356851563427e-06,
1107
- "loss": 1.154,
1108
- "step": 910
1109
- },
1110
- {
1111
- "epoch": 1.48,
1112
- "learning_rate": 5.1577659489340255e-06,
1113
- "loss": 1.1353,
1114
- "step": 915
1115
- },
1116
- {
1117
- "epoch": 1.49,
1118
- "learning_rate": 5.00694300338638e-06,
1119
- "loss": 1.1783,
1120
- "step": 920
1121
- },
1122
- {
1123
- "epoch": 1.5,
1124
- "learning_rate": 4.857915107193783e-06,
1125
- "loss": 1.14,
1126
- "step": 925
1127
- },
1128
- {
1129
- "epoch": 1.51,
1130
- "learning_rate": 4.710709030185422e-06,
1131
- "loss": 1.1755,
1132
- "step": 930
1133
- },
1134
- {
1135
- "epoch": 1.51,
1136
- "learning_rate": 4.565351214937748e-06,
1137
- "loss": 1.1121,
1138
- "step": 935
1139
- },
1140
- {
1141
- "epoch": 1.52,
1142
- "learning_rate": 4.421867772024601e-06,
1143
- "loss": 1.171,
1144
- "step": 940
1145
- },
1146
- {
1147
- "epoch": 1.53,
1148
- "learning_rate": 4.280284475326948e-06,
1149
- "loss": 1.1517,
1150
- "step": 945
1151
- },
1152
- {
1153
- "epoch": 1.54,
1154
- "learning_rate": 4.140626757403176e-06,
1155
- "loss": 1.1694,
1156
- "step": 950
1157
- },
1158
- {
1159
- "epoch": 1.55,
1160
- "learning_rate": 4.002919704920607e-06,
1161
- "loss": 1.1465,
1162
- "step": 955
1163
- },
1164
- {
1165
- "epoch": 1.56,
1166
- "learning_rate": 3.8671880541492236e-06,
1167
- "loss": 1.179,
1168
- "step": 960
1169
- },
1170
- {
1171
- "epoch": 1.56,
1172
- "learning_rate": 3.7334561865182694e-06,
1173
- "loss": 1.1498,
1174
- "step": 965
1175
- },
1176
- {
1177
- "epoch": 1.57,
1178
- "learning_rate": 3.6017481242366503e-06,
1179
- "loss": 1.1438,
1180
- "step": 970
1181
- },
1182
- {
1183
- "epoch": 1.58,
1184
- "learning_rate": 3.472087525977823e-06,
1185
- "loss": 1.1647,
1186
- "step": 975
1187
- },
1188
- {
1189
- "epoch": 1.59,
1190
- "learning_rate": 3.3444976826299754e-06,
1191
- "loss": 1.1475,
1192
- "step": 980
1193
- },
1194
- {
1195
- "epoch": 1.6,
1196
- "learning_rate": 3.219001513112329e-06,
1197
- "loss": 1.15,
1198
- "step": 985
1199
- },
1200
- {
1201
- "epoch": 1.6,
1202
- "learning_rate": 3.0956215602581933e-06,
1203
- "loss": 1.1613,
1204
- "step": 990
1205
- },
1206
- {
1207
- "epoch": 1.61,
1208
- "learning_rate": 2.974379986765622e-06,
1209
- "loss": 1.1672,
1210
- "step": 995
1211
- },
1212
- {
1213
- "epoch": 1.62,
1214
- "learning_rate": 2.855298571216316e-06,
1215
- "loss": 1.1702,
1216
- "step": 1000
1217
- },
1218
- {
1219
- "epoch": 1.63,
1220
- "learning_rate": 2.738398704163561e-06,
1221
- "loss": 1.1634,
1222
- "step": 1005
1223
- },
1224
- {
1225
- "epoch": 1.64,
1226
- "learning_rate": 2.6237013842898533e-06,
1227
- "loss": 1.1756,
1228
- "step": 1010
1229
- },
1230
- {
1231
- "epoch": 1.64,
1232
- "learning_rate": 2.511227214634887e-06,
1233
- "loss": 1.1075,
1234
- "step": 1015
1235
  }
1236
  ],
1237
- "max_steps": 1234,
1238
  "num_train_epochs": 2,
1239
- "total_flos": 4.827958566725878e+17,
1240
  "trial_name": null,
1241
  "trial_params": null
1242
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.10196752819786556,
5
+ "global_step": 126,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.0,
12
+ "learning_rate": 1.3333333333333334e-06,
13
+ "loss": 1.1835,
14
  "step": 5
15
  },
16
  {
17
+ "epoch": 0.01,
18
+ "learning_rate": 2.666666666666667e-06,
19
+ "loss": 1.0039,
20
  "step": 10
21
  },
22
  {
23
+ "epoch": 0.01,
24
+ "learning_rate": 4.000000000000001e-06,
25
+ "loss": 0.8718,
26
  "step": 15
27
  },
28
  {
29
+ "epoch": 0.02,
30
+ "learning_rate": 5.333333333333334e-06,
31
+ "loss": 0.8558,
32
  "step": 20
33
  },
34
  {
35
+ "epoch": 0.02,
36
+ "learning_rate": 6.666666666666667e-06,
37
+ "loss": 0.7921,
38
  "step": 25
39
  },
40
  {
41
+ "epoch": 0.02,
42
+ "learning_rate": 8.000000000000001e-06,
43
+ "loss": 0.8194,
44
  "step": 30
45
  },
46
  {
47
+ "epoch": 0.03,
48
+ "learning_rate": 9.333333333333334e-06,
49
+ "loss": 0.8334,
50
  "step": 35
51
  },
52
  {
53
+ "epoch": 0.03,
54
+ "learning_rate": 1.0666666666666667e-05,
55
+ "loss": 0.8208,
56
  "step": 40
57
  },
58
  {
59
+ "epoch": 0.04,
60
+ "learning_rate": 1.2e-05,
61
+ "loss": 0.8417,
62
  "step": 45
63
  },
64
  {
65
+ "epoch": 0.04,
66
+ "learning_rate": 1.3333333333333333e-05,
67
+ "loss": 0.8709,
68
  "step": 50
69
  },
70
  {
71
+ "epoch": 0.04,
72
+ "learning_rate": 1.4666666666666666e-05,
73
+ "loss": 0.7942,
74
  "step": 55
75
  },
76
  {
77
+ "epoch": 0.05,
78
+ "learning_rate": 1.6000000000000003e-05,
79
+ "loss": 0.8464,
80
  "step": 60
81
  },
82
  {
83
+ "epoch": 0.05,
84
+ "learning_rate": 1.7333333333333336e-05,
85
+ "loss": 0.8823,
86
  "step": 65
87
  },
88
  {
89
+ "epoch": 0.06,
90
+ "learning_rate": 1.866666666666667e-05,
91
+ "loss": 0.8749,
92
  "step": 70
93
  },
94
  {
95
+ "epoch": 0.06,
96
+ "learning_rate": 2e-05,
97
+ "loss": 0.833,
98
  "step": 75
99
  },
100
  {
101
+ "epoch": 0.06,
102
+ "learning_rate": 1.999978492141723e-05,
103
+ "loss": 0.8407,
104
  "step": 80
105
  },
106
  {
107
+ "epoch": 0.07,
108
+ "learning_rate": 1.999913969492067e-05,
109
+ "loss": 0.842,
110
  "step": 85
111
  },
112
  {
113
+ "epoch": 0.07,
114
+ "learning_rate": 1.99980643482652e-05,
115
+ "loss": 0.8385,
116
  "step": 90
117
  },
118
  {
119
+ "epoch": 0.08,
120
+ "learning_rate": 1.9996558927707637e-05,
121
+ "loss": 0.8249,
122
  "step": 95
123
  },
124
  {
125
+ "epoch": 0.08,
126
+ "learning_rate": 1.9994623498004717e-05,
127
+ "loss": 0.8424,
128
  "step": 100
129
  },
130
  {
131
+ "epoch": 0.08,
132
+ "learning_rate": 1.9992258142410335e-05,
133
+ "loss": 0.8673,
134
  "step": 105
135
  },
136
  {
137
+ "epoch": 0.09,
138
+ "learning_rate": 1.9989462962671957e-05,
139
+ "loss": 0.8378,
140
  "step": 110
141
  },
142
  {
143
+ "epoch": 0.09,
144
+ "learning_rate": 1.998623807902625e-05,
145
+ "loss": 0.8581,
146
  "step": 115
147
  },
148
  {
149
+ "epoch": 0.1,
150
+ "learning_rate": 1.9982583630193882e-05,
151
+ "loss": 0.8201,
152
  "step": 120
153
  },
154
  {
155
+ "epoch": 0.1,
156
+ "learning_rate": 1.9978499773373596e-05,
157
+ "loss": 0.8574,
158
  "step": 125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  }
160
  ],
161
+ "max_steps": 2470,
162
  "num_train_epochs": 2,
163
+ "total_flos": 2.9702517424128e+16,
164
  "trial_name": null,
165
  "trial_params": null
166
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41b3235764a2f119a2586ca316b39c8e5986e318c5b3665781ee71759bb93cc0
3
- size 4271
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99af292dc978d81b8e09169d9fb8410c030752d9805e10400ee9f76704363117
3
+ size 4335
vocab.json CHANGED
The diff for this file is too large to render. See raw diff