gyr66 commited on
Commit
e83bddc
β€’
1 Parent(s): 14515df

Remove training state files

Browse files
Files changed (5) hide show
  1. optimizer.pt +0 -3
  2. rng_state.pth +0 -3
  3. scheduler.pt +0 -3
  4. trainer_state.json +0 -667
  5. training_args.bin +0 -3
optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed717cfcb9b86276595d2d947497e8b709cb5d79272a85565f833ddbf291464f
3
- size 2360545210
 
 
 
 
rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bad960e5f27bf370fe9b0d11733039b61e3de26728906f85e05d2a58f7978531
3
- size 14244
 
 
 
 
scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:219f4d64086ccfdb80cef48db3e01581583aed2c709577da652b2e88e2ab0789
3
- size 1064
 
 
 
 
trainer_state.json DELETED
@@ -1,667 +0,0 @@
1
- {
2
- "best_metric": 0.7295733911785972,
3
- "best_model_checkpoint": "Ernie-3.0-large-chinese-finetuned-ner/checkpoint-9072",
4
- "epoch": 36.0,
5
- "eval_steps": 500,
6
- "global_step": 9072,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 1.0,
13
- "learning_rate": 1.9600000000000002e-05,
14
- "loss": 0.7619,
15
- "step": 252
16
- },
17
- {
18
- "epoch": 1.0,
19
- "eval_accuracy": 0.8891147174729265,
20
- "eval_f1": 0.5908584169453734,
21
- "eval_loss": 0.371647447347641,
22
- "eval_precision": 0.5302120848339336,
23
- "eval_recall": 0.6671701913393756,
24
- "eval_runtime": 30.5213,
25
- "eval_samples_per_second": 16.48,
26
- "eval_steps_per_second": 2.064,
27
- "step": 252
28
- },
29
- {
30
- "epoch": 2.0,
31
- "learning_rate": 1.9200000000000003e-05,
32
- "loss": 0.3277,
33
- "step": 504
34
- },
35
- {
36
- "epoch": 2.0,
37
- "eval_accuracy": 0.9043503894250163,
38
- "eval_f1": 0.6514158800666296,
39
- "eval_loss": 0.31230422854423523,
40
- "eval_precision": 0.5827538247566064,
41
- "eval_recall": 0.7384189325276939,
42
- "eval_runtime": 30.3199,
43
- "eval_samples_per_second": 16.59,
44
- "eval_steps_per_second": 2.078,
45
- "step": 504
46
- },
47
- {
48
- "epoch": 3.0,
49
- "learning_rate": 1.88e-05,
50
- "loss": 0.2539,
51
- "step": 756
52
- },
53
- {
54
- "epoch": 3.0,
55
- "eval_accuracy": 0.9070344741986534,
56
- "eval_f1": 0.6798141221806642,
57
- "eval_loss": 0.3259331285953522,
58
- "eval_precision": 0.6182230467944754,
59
- "eval_recall": 0.7550352467270897,
60
- "eval_runtime": 31.8667,
61
- "eval_samples_per_second": 15.785,
62
- "eval_steps_per_second": 1.977,
63
- "step": 756
64
- },
65
- {
66
- "epoch": 4.0,
67
- "learning_rate": 1.8400000000000003e-05,
68
- "loss": 0.209,
69
- "step": 1008
70
- },
71
- {
72
- "epoch": 4.0,
73
- "eval_accuracy": 0.899121854345735,
74
- "eval_f1": 0.6714031971580817,
75
- "eval_loss": 0.3699039816856384,
76
- "eval_precision": 0.6004765687053217,
77
- "eval_recall": 0.7613293051359517,
78
- "eval_runtime": 29.8895,
79
- "eval_samples_per_second": 16.829,
80
- "eval_steps_per_second": 2.108,
81
- "step": 1008
82
- },
83
- {
84
- "epoch": 5.0,
85
- "learning_rate": 1.8e-05,
86
- "loss": 0.1696,
87
- "step": 1260
88
- },
89
- {
90
- "epoch": 5.0,
91
- "eval_accuracy": 0.9081980947652589,
92
- "eval_f1": 0.7022252810277587,
93
- "eval_loss": 0.35198503732681274,
94
- "eval_precision": 0.6449641803624104,
95
- "eval_recall": 0.7706445115810675,
96
- "eval_runtime": 31.5563,
97
- "eval_samples_per_second": 15.94,
98
- "eval_steps_per_second": 1.996,
99
- "step": 1260
100
- },
101
- {
102
- "epoch": 6.0,
103
- "learning_rate": 1.76e-05,
104
- "loss": 0.1394,
105
- "step": 1512
106
- },
107
- {
108
- "epoch": 6.0,
109
- "eval_accuracy": 0.9099047382629473,
110
- "eval_f1": 0.6968929804372841,
111
- "eval_loss": 0.36153343319892883,
112
- "eval_precision": 0.6417973717676981,
113
- "eval_recall": 0.7623363544813696,
114
- "eval_runtime": 30.5397,
115
- "eval_samples_per_second": 16.47,
116
- "eval_steps_per_second": 2.063,
117
- "step": 1512
118
- },
119
- {
120
- "epoch": 7.0,
121
- "learning_rate": 1.72e-05,
122
- "loss": 0.1184,
123
- "step": 1764
124
- },
125
- {
126
- "epoch": 7.0,
127
- "eval_accuracy": 0.9082911844105874,
128
- "eval_f1": 0.7067113318701628,
129
- "eval_loss": 0.37900495529174805,
130
- "eval_precision": 0.6529348986125934,
131
- "eval_recall": 0.7701409869083585,
132
- "eval_runtime": 30.8222,
133
- "eval_samples_per_second": 16.319,
134
- "eval_steps_per_second": 2.044,
135
- "step": 1764
136
- },
137
- {
138
- "epoch": 8.0,
139
- "learning_rate": 1.6800000000000002e-05,
140
- "loss": 0.1011,
141
- "step": 2016
142
- },
143
- {
144
- "epoch": 8.0,
145
- "eval_accuracy": 0.9137214137214137,
146
- "eval_f1": 0.7153439153439154,
147
- "eval_loss": 0.38163959980010986,
148
- "eval_precision": 0.671078755790867,
149
- "eval_recall": 0.7658610271903323,
150
- "eval_runtime": 30.6484,
151
- "eval_samples_per_second": 16.412,
152
- "eval_steps_per_second": 2.056,
153
- "step": 2016
154
- },
155
- {
156
- "epoch": 9.0,
157
- "learning_rate": 1.64e-05,
158
- "loss": 0.0853,
159
- "step": 2268
160
- },
161
- {
162
- "epoch": 9.0,
163
- "eval_accuracy": 0.9104477611940298,
164
- "eval_f1": 0.7113811638233232,
165
- "eval_loss": 0.4221731722354889,
166
- "eval_precision": 0.6637592673353685,
167
- "eval_recall": 0.7663645518630413,
168
- "eval_runtime": 29.8305,
169
- "eval_samples_per_second": 16.862,
170
- "eval_steps_per_second": 2.112,
171
- "step": 2268
172
- },
173
- {
174
- "epoch": 10.0,
175
- "learning_rate": 1.6000000000000003e-05,
176
- "loss": 0.0726,
177
- "step": 2520
178
- },
179
- {
180
- "epoch": 10.0,
181
- "eval_accuracy": 0.9108201197753436,
182
- "eval_f1": 0.7088488276187108,
183
- "eval_loss": 0.45063257217407227,
184
- "eval_precision": 0.6662236987818383,
185
- "eval_recall": 0.75730110775428,
186
- "eval_runtime": 31.119,
187
- "eval_samples_per_second": 16.164,
188
- "eval_steps_per_second": 2.024,
189
- "step": 2520
190
- },
191
- {
192
- "epoch": 11.0,
193
- "learning_rate": 1.5600000000000003e-05,
194
- "loss": 0.0625,
195
- "step": 2772
196
- },
197
- {
198
- "epoch": 11.0,
199
- "eval_accuracy": 0.9118596208148447,
200
- "eval_f1": 0.7197667638483967,
201
- "eval_loss": 0.465919554233551,
202
- "eval_precision": 0.6704323267434282,
203
- "eval_recall": 0.7769385699899295,
204
- "eval_runtime": 30.3792,
205
- "eval_samples_per_second": 16.557,
206
- "eval_steps_per_second": 2.074,
207
- "step": 2772
208
- },
209
- {
210
- "epoch": 12.0,
211
- "learning_rate": 1.5200000000000002e-05,
212
- "loss": 0.0554,
213
- "step": 3024
214
- },
215
- {
216
- "epoch": 12.0,
217
- "eval_accuracy": 0.9138920780711826,
218
- "eval_f1": 0.7228513529622126,
219
- "eval_loss": 0.4619043469429016,
220
- "eval_precision": 0.6864387593389178,
221
- "eval_recall": 0.7633434038267876,
222
- "eval_runtime": 32.0505,
223
- "eval_samples_per_second": 15.694,
224
- "eval_steps_per_second": 1.966,
225
- "step": 3024
226
- },
227
- {
228
- "epoch": 13.0,
229
- "learning_rate": 1.48e-05,
230
- "loss": 0.0468,
231
- "step": 3276
232
- },
233
- {
234
- "epoch": 13.0,
235
- "eval_accuracy": 0.9101840071989326,
236
- "eval_f1": 0.7200470311581422,
237
- "eval_loss": 0.5248007774353027,
238
- "eval_precision": 0.6754908449150673,
239
- "eval_recall": 0.770896273917422,
240
- "eval_runtime": 30.3642,
241
- "eval_samples_per_second": 16.566,
242
- "eval_steps_per_second": 2.075,
243
- "step": 3276
244
- },
245
- {
246
- "epoch": 14.0,
247
- "learning_rate": 1.4400000000000001e-05,
248
- "loss": 0.0416,
249
- "step": 3528
250
- },
251
- {
252
- "epoch": 14.0,
253
- "eval_accuracy": 0.9106649703664629,
254
- "eval_f1": 0.7144713526284701,
255
- "eval_loss": 0.5338897109031677,
256
- "eval_precision": 0.6730469619407968,
257
- "eval_recall": 0.7613293051359517,
258
- "eval_runtime": 30.627,
259
- "eval_samples_per_second": 16.423,
260
- "eval_steps_per_second": 2.057,
261
- "step": 3528
262
- },
263
- {
264
- "epoch": 15.0,
265
- "learning_rate": 1.4e-05,
266
- "loss": 0.0349,
267
- "step": 3780
268
- },
269
- {
270
- "epoch": 15.0,
271
- "eval_accuracy": 0.9078877959474975,
272
- "eval_f1": 0.71319108805847,
273
- "eval_loss": 0.5588390827178955,
274
- "eval_precision": 0.6705830192861894,
275
- "eval_recall": 0.7615810674723061,
276
- "eval_runtime": 30.9024,
277
- "eval_samples_per_second": 16.277,
278
- "eval_steps_per_second": 2.039,
279
- "step": 3780
280
- },
281
- {
282
- "epoch": 16.0,
283
- "learning_rate": 1.3600000000000002e-05,
284
- "loss": 0.0301,
285
- "step": 4032
286
- },
287
- {
288
- "epoch": 16.0,
289
- "eval_accuracy": 0.9083687591150278,
290
- "eval_f1": 0.7168475696083058,
291
- "eval_loss": 0.5909355878829956,
292
- "eval_precision": 0.6745115452930728,
293
- "eval_recall": 0.7648539778449144,
294
- "eval_runtime": 31.5608,
295
- "eval_samples_per_second": 15.938,
296
- "eval_steps_per_second": 1.996,
297
- "step": 4032
298
- },
299
- {
300
- "epoch": 17.0,
301
- "learning_rate": 1.3200000000000002e-05,
302
- "loss": 0.0293,
303
- "step": 4284
304
- },
305
- {
306
- "epoch": 17.0,
307
- "eval_accuracy": 0.9139075930120706,
308
- "eval_f1": 0.7264906201457759,
309
- "eval_loss": 0.557870090007782,
310
- "eval_precision": 0.6913804866954741,
311
- "eval_recall": 0.7653575025176234,
312
- "eval_runtime": 31.1498,
313
- "eval_samples_per_second": 16.148,
314
- "eval_steps_per_second": 2.022,
315
- "step": 4284
316
- },
317
- {
318
- "epoch": 18.0,
319
- "learning_rate": 1.2800000000000001e-05,
320
- "loss": 0.0264,
321
- "step": 4536
322
- },
323
- {
324
- "epoch": 18.0,
325
- "eval_accuracy": 0.9099047382629473,
326
- "eval_f1": 0.7172199665631717,
327
- "eval_loss": 0.5989866256713867,
328
- "eval_precision": 0.682189913675602,
329
- "eval_recall": 0.7560422960725075,
330
- "eval_runtime": 29.8647,
331
- "eval_samples_per_second": 16.843,
332
- "eval_steps_per_second": 2.11,
333
- "step": 4536
334
- },
335
- {
336
- "epoch": 19.0,
337
- "learning_rate": 1.2400000000000002e-05,
338
- "loss": 0.0223,
339
- "step": 4788
340
- },
341
- {
342
- "epoch": 19.0,
343
- "eval_accuracy": 0.9099357681447233,
344
- "eval_f1": 0.718458082062197,
345
- "eval_loss": 0.6070677638053894,
346
- "eval_precision": 0.6773690078037904,
347
- "eval_recall": 0.7648539778449144,
348
- "eval_runtime": 30.5876,
349
- "eval_samples_per_second": 16.445,
350
- "eval_steps_per_second": 2.06,
351
- "step": 4788
352
- },
353
- {
354
- "epoch": 20.0,
355
- "learning_rate": 1.2e-05,
356
- "loss": 0.0195,
357
- "step": 5040
358
- },
359
- {
360
- "epoch": 20.0,
361
- "eval_accuracy": 0.9119992552828373,
362
- "eval_f1": 0.720656439529076,
363
- "eval_loss": 0.6215817928314209,
364
- "eval_precision": 0.68289384719405,
365
- "eval_recall": 0.7628398791540786,
366
- "eval_runtime": 30.3859,
367
- "eval_samples_per_second": 16.554,
368
- "eval_steps_per_second": 2.073,
369
- "step": 5040
370
- },
371
- {
372
- "epoch": 21.0,
373
- "learning_rate": 1.16e-05,
374
- "loss": 0.0181,
375
- "step": 5292
376
- },
377
- {
378
- "epoch": 21.0,
379
- "eval_accuracy": 0.9089428119278865,
380
- "eval_f1": 0.7139341380522678,
381
- "eval_loss": 0.6391084790229797,
382
- "eval_precision": 0.6678360008770007,
383
- "eval_recall": 0.7668680765357503,
384
- "eval_runtime": 31.753,
385
- "eval_samples_per_second": 15.841,
386
- "eval_steps_per_second": 1.984,
387
- "step": 5292
388
- },
389
- {
390
- "epoch": 22.0,
391
- "learning_rate": 1.1200000000000001e-05,
392
- "loss": 0.016,
393
- "step": 5544
394
- },
395
- {
396
- "epoch": 22.0,
397
- "eval_accuracy": 0.9129766965587861,
398
- "eval_f1": 0.7200477326968974,
399
- "eval_loss": 0.6382994055747986,
400
- "eval_precision": 0.684437386569873,
401
- "eval_recall": 0.7595669687814703,
402
- "eval_runtime": 30.7224,
403
- "eval_samples_per_second": 16.372,
404
- "eval_steps_per_second": 2.051,
405
- "step": 5544
406
- },
407
- {
408
- "epoch": 23.0,
409
- "learning_rate": 1.0800000000000002e-05,
410
- "loss": 0.0153,
411
- "step": 5796
412
- },
413
- {
414
- "epoch": 23.0,
415
- "eval_accuracy": 0.9136903838396375,
416
- "eval_f1": 0.7225759942328487,
417
- "eval_loss": 0.6503807902336121,
418
- "eval_precision": 0.6911054929901173,
419
- "eval_recall": 0.7570493454179255,
420
- "eval_runtime": 30.4671,
421
- "eval_samples_per_second": 16.51,
422
- "eval_steps_per_second": 2.068,
423
- "step": 5796
424
- },
425
- {
426
- "epoch": 24.0,
427
- "learning_rate": 1.04e-05,
428
- "loss": 0.0135,
429
- "step": 6048
430
- },
431
- {
432
- "epoch": 24.0,
433
- "eval_accuracy": 0.911083873770441,
434
- "eval_f1": 0.7179244165383247,
435
- "eval_loss": 0.6738658547401428,
436
- "eval_precision": 0.6780040277466994,
437
- "eval_recall": 0.7628398791540786,
438
- "eval_runtime": 31.8247,
439
- "eval_samples_per_second": 15.805,
440
- "eval_steps_per_second": 1.98,
441
- "step": 6048
442
- },
443
- {
444
- "epoch": 25.0,
445
- "learning_rate": 1e-05,
446
- "loss": 0.0103,
447
- "step": 6300
448
- },
449
- {
450
- "epoch": 25.0,
451
- "eval_accuracy": 0.9127284575045769,
452
- "eval_f1": 0.7242580800192239,
453
- "eval_loss": 0.677778422832489,
454
- "eval_precision": 0.6927143185474604,
455
- "eval_recall": 0.7588116817724069,
456
- "eval_runtime": 31.5342,
457
- "eval_samples_per_second": 15.951,
458
- "eval_steps_per_second": 1.998,
459
- "step": 6300
460
- },
461
- {
462
- "epoch": 26.0,
463
- "learning_rate": 9.600000000000001e-06,
464
- "loss": 0.0109,
465
- "step": 6552
466
- },
467
- {
468
- "epoch": 26.0,
469
- "eval_accuracy": 0.9117665311695162,
470
- "eval_f1": 0.7213822894168466,
471
- "eval_loss": 0.6734189987182617,
472
- "eval_precision": 0.6891334250343879,
473
- "eval_recall": 0.756797583081571,
474
- "eval_runtime": 30.9002,
475
- "eval_samples_per_second": 16.278,
476
- "eval_steps_per_second": 2.039,
477
- "step": 6552
478
- },
479
- {
480
- "epoch": 27.0,
481
- "learning_rate": 9.200000000000002e-06,
482
- "loss": 0.0097,
483
- "step": 6804
484
- },
485
- {
486
- "epoch": 27.0,
487
- "eval_accuracy": 0.9124647035094796,
488
- "eval_f1": 0.7231973434535104,
489
- "eval_loss": 0.6837841272354126,
490
- "eval_precision": 0.6836322869955157,
491
- "eval_recall": 0.7676233635448136,
492
- "eval_runtime": 30.4438,
493
- "eval_samples_per_second": 16.522,
494
- "eval_steps_per_second": 2.069,
495
- "step": 6804
496
- },
497
- {
498
- "epoch": 28.0,
499
- "learning_rate": 8.8e-06,
500
- "loss": 0.0092,
501
- "step": 7056
502
- },
503
- {
504
- "epoch": 28.0,
505
- "eval_accuracy": 0.9118130759921804,
506
- "eval_f1": 0.7228887320630439,
507
- "eval_loss": 0.7063636183738708,
508
- "eval_precision": 0.6783664459161148,
509
- "eval_recall": 0.7736656596173213,
510
- "eval_runtime": 32.0989,
511
- "eval_samples_per_second": 15.67,
512
- "eval_steps_per_second": 1.963,
513
- "step": 7056
514
- },
515
- {
516
- "epoch": 29.0,
517
- "learning_rate": 8.400000000000001e-06,
518
- "loss": 0.0079,
519
- "step": 7308
520
- },
521
- {
522
- "epoch": 29.0,
523
- "eval_accuracy": 0.9113941725882024,
524
- "eval_f1": 0.7218511450381679,
525
- "eval_loss": 0.6944219470024109,
526
- "eval_precision": 0.685856754306437,
527
- "eval_recall": 0.7618328298086606,
528
- "eval_runtime": 31.0056,
529
- "eval_samples_per_second": 16.223,
530
- "eval_steps_per_second": 2.032,
531
- "step": 7308
532
- },
533
- {
534
- "epoch": 30.0,
535
- "learning_rate": 8.000000000000001e-06,
536
- "loss": 0.0078,
537
- "step": 7560
538
- },
539
- {
540
- "epoch": 30.0,
541
- "eval_accuracy": 0.9092220808638719,
542
- "eval_f1": 0.717688679245283,
543
- "eval_loss": 0.7390450835227966,
544
- "eval_precision": 0.6750221827861579,
545
- "eval_recall": 0.7661127895266868,
546
- "eval_runtime": 32.0929,
547
- "eval_samples_per_second": 15.673,
548
- "eval_steps_per_second": 1.963,
549
- "step": 7560
550
- },
551
- {
552
- "epoch": 31.0,
553
- "learning_rate": 7.600000000000001e-06,
554
- "loss": 0.0066,
555
- "step": 7812
556
- },
557
- {
558
- "epoch": 31.0,
559
- "eval_accuracy": 0.9112235082384336,
560
- "eval_f1": 0.7232323232323231,
561
- "eval_loss": 0.7528515458106995,
562
- "eval_precision": 0.6848975917173081,
563
- "eval_recall": 0.7661127895266868,
564
- "eval_runtime": 31.1215,
565
- "eval_samples_per_second": 16.162,
566
- "eval_steps_per_second": 2.024,
567
- "step": 7812
568
- },
569
- {
570
- "epoch": 32.0,
571
- "learning_rate": 7.2000000000000005e-06,
572
- "loss": 0.0061,
573
- "step": 8064
574
- },
575
- {
576
- "epoch": 32.0,
577
- "eval_accuracy": 0.9128680919725696,
578
- "eval_f1": 0.7292161520190024,
579
- "eval_loss": 0.7525067925453186,
580
- "eval_precision": 0.6901978417266187,
581
- "eval_recall": 0.7729103726082578,
582
- "eval_runtime": 30.5355,
583
- "eval_samples_per_second": 16.473,
584
- "eval_steps_per_second": 2.063,
585
- "step": 8064
586
- },
587
- {
588
- "epoch": 33.0,
589
- "learning_rate": 6.800000000000001e-06,
590
- "loss": 0.005,
591
- "step": 8316
592
- },
593
- {
594
- "epoch": 33.0,
595
- "eval_accuracy": 0.9131939057312192,
596
- "eval_f1": 0.7289473684210527,
597
- "eval_loss": 0.7354016304016113,
598
- "eval_precision": 0.6943938012762079,
599
- "eval_recall": 0.7671198388721048,
600
- "eval_runtime": 30.6675,
601
- "eval_samples_per_second": 16.402,
602
- "eval_steps_per_second": 2.054,
603
- "step": 8316
604
- },
605
- {
606
- "epoch": 34.0,
607
- "learning_rate": 6.4000000000000006e-06,
608
- "loss": 0.0059,
609
- "step": 8568
610
- },
611
- {
612
- "epoch": 34.0,
613
- "eval_accuracy": 0.9108666645980079,
614
- "eval_f1": 0.723687336659539,
615
- "eval_loss": 0.7652931809425354,
616
- "eval_precision": 0.6851102114260009,
617
- "eval_recall": 0.7668680765357503,
618
- "eval_runtime": 31.9436,
619
- "eval_samples_per_second": 15.747,
620
- "eval_steps_per_second": 1.972,
621
- "step": 8568
622
- },
623
- {
624
- "epoch": 35.0,
625
- "learning_rate": 6e-06,
626
- "loss": 0.0047,
627
- "step": 8820
628
- },
629
- {
630
- "epoch": 35.0,
631
- "eval_accuracy": 0.9116113817606355,
632
- "eval_f1": 0.7277705035114866,
633
- "eval_loss": 0.7705232501029968,
634
- "eval_precision": 0.6902235267554753,
635
- "eval_recall": 0.7696374622356495,
636
- "eval_runtime": 30.572,
637
- "eval_samples_per_second": 16.453,
638
- "eval_steps_per_second": 2.061,
639
- "step": 8820
640
- },
641
- {
642
- "epoch": 36.0,
643
- "learning_rate": 5.600000000000001e-06,
644
- "loss": 0.0049,
645
- "step": 9072
646
- },
647
- {
648
- "epoch": 36.0,
649
- "eval_accuracy": 0.9124336736277034,
650
- "eval_f1": 0.7295733911785972,
651
- "eval_loss": 0.7525166273117065,
652
- "eval_precision": 0.6997226074895978,
653
- "eval_recall": 0.7620845921450151,
654
- "eval_runtime": 30.7353,
655
- "eval_samples_per_second": 16.366,
656
- "eval_steps_per_second": 2.05,
657
- "step": 9072
658
- }
659
- ],
660
- "logging_steps": 500,
661
- "max_steps": 12600,
662
- "num_train_epochs": 50,
663
- "save_steps": 500,
664
- "total_flos": 3.469062692905536e+16,
665
- "trial_name": null,
666
- "trial_params": null
667
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5f2a53f67b72df3731a22e15fcd641c1c36da21621bbfbcbdfddbde9cf04d603
3
- size 4600