kms-engineer commited on
Commit
a1dd03f
·
1 Parent(s): 765fa9a

Remove training checkpoints to reduce model size

Browse files
checkpoint-1179/config.json DELETED
@@ -1,71 +0,0 @@
1
- {
2
- "architectures": [
3
- "RobertaForSequenceClassification"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "bos_token_id": 0,
7
- "classifier_dropout": null,
8
- "dtype": "float32",
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "add_contact",
15
- "1": "edit_phone",
16
- "2": "edit_email",
17
- "3": "edit_address",
18
- "4": "delete_contact",
19
- "5": "list_all_contacts",
20
- "6": "search_contacts",
21
- "7": "add_birthday",
22
- "8": "list_birthdays",
23
- "9": "add_note",
24
- "10": "edit_note",
25
- "11": "delete_note",
26
- "12": "show_notes",
27
- "13": "add_note_tag",
28
- "14": "remove_note_tag",
29
- "15": "search_notes_text",
30
- "16": "search_notes_by_tag",
31
- "17": "help",
32
- "18": "exit",
33
- "19": "hello"
34
- },
35
- "initializer_range": 0.02,
36
- "intermediate_size": 3072,
37
- "label2id": {
38
- "add_birthday": 7,
39
- "add_contact": 0,
40
- "add_note": 9,
41
- "add_note_tag": 13,
42
- "delete_contact": 4,
43
- "delete_note": 11,
44
- "edit_address": 3,
45
- "edit_email": 2,
46
- "edit_note": 10,
47
- "edit_phone": 1,
48
- "exit": 18,
49
- "hello": 19,
50
- "help": 17,
51
- "list_all_contacts": 5,
52
- "list_birthdays": 8,
53
- "remove_note_tag": 14,
54
- "search_contacts": 6,
55
- "search_notes_by_tag": 16,
56
- "search_notes_text": 15,
57
- "show_notes": 12
58
- },
59
- "layer_norm_eps": 1e-05,
60
- "max_position_embeddings": 514,
61
- "model_type": "roberta",
62
- "num_attention_heads": 12,
63
- "num_hidden_layers": 12,
64
- "pad_token_id": 1,
65
- "position_embedding_type": "absolute",
66
- "problem_type": "single_label_classification",
67
- "transformers_version": "4.57.0",
68
- "type_vocab_size": 1,
69
- "use_cache": true,
70
- "vocab_size": 50265
71
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1179/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:43c1a057215ca31f05511e2fca66fe21b08567f621d3cf68599c9f21c43b06a8
3
- size 498668192
 
 
 
 
checkpoint-1179/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fc3886f62b31f55c1d966ff304bb7f67061dbe462e66c4c9754390c06b30cbd
3
- size 997451019
 
 
 
 
checkpoint-1179/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf9c3b20a0a343ce38e5a13fb76bf553acda38dfb893bf7123a2d6ccc5edc6d9
3
- size 14455
 
 
 
 
checkpoint-1179/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ec190660d99873e82bc04f22cd7f50d3496428963a842ad2ae1e9cd4a99a031
3
- size 1465
 
 
 
 
checkpoint-1179/trainer_state.json DELETED
@@ -1,970 +0,0 @@
1
- {
2
- "best_global_step": 917,
3
- "best_metric": 0.980806142034549,
4
- "best_model_checkpoint": "models/intent_classifier/checkpoint-917",
5
- "epoch": 9.0,
6
- "eval_steps": 500,
7
- "global_step": 1179,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.07633587786259542,
14
- "grad_norm": 4.617219924926758,
15
- "learning_rate": 1.8000000000000001e-06,
16
- "loss": 3.0068,
17
- "step": 10
18
- },
19
- {
20
- "epoch": 0.15267175572519084,
21
- "grad_norm": 2.9202663898468018,
22
- "learning_rate": 3.8000000000000005e-06,
23
- "loss": 2.9968,
24
- "step": 20
25
- },
26
- {
27
- "epoch": 0.22900763358778625,
28
- "grad_norm": 2.677699327468872,
29
- "learning_rate": 5.8e-06,
30
- "loss": 2.9956,
31
- "step": 30
32
- },
33
- {
34
- "epoch": 0.3053435114503817,
35
- "grad_norm": 3.113600254058838,
36
- "learning_rate": 7.800000000000002e-06,
37
- "loss": 3.0013,
38
- "step": 40
39
- },
40
- {
41
- "epoch": 0.3816793893129771,
42
- "grad_norm": 3.7662277221679688,
43
- "learning_rate": 9.800000000000001e-06,
44
- "loss": 2.9809,
45
- "step": 50
46
- },
47
- {
48
- "epoch": 0.4580152671755725,
49
- "grad_norm": 8.282344818115234,
50
- "learning_rate": 1.18e-05,
51
- "loss": 2.9022,
52
- "step": 60
53
- },
54
- {
55
- "epoch": 0.5343511450381679,
56
- "grad_norm": 8.860713005065918,
57
- "learning_rate": 1.38e-05,
58
- "loss": 2.7668,
59
- "step": 70
60
- },
61
- {
62
- "epoch": 0.6106870229007634,
63
- "grad_norm": 11.435643196105957,
64
- "learning_rate": 1.58e-05,
65
- "loss": 2.5216,
66
- "step": 80
67
- },
68
- {
69
- "epoch": 0.6870229007633588,
70
- "grad_norm": 10.48116397857666,
71
- "learning_rate": 1.7800000000000002e-05,
72
- "loss": 2.3281,
73
- "step": 90
74
- },
75
- {
76
- "epoch": 0.7633587786259542,
77
- "grad_norm": 12.855015754699707,
78
- "learning_rate": 1.98e-05,
79
- "loss": 2.0133,
80
- "step": 100
81
- },
82
- {
83
- "epoch": 0.8396946564885496,
84
- "grad_norm": 12.89151668548584,
85
- "learning_rate": 1.985123966942149e-05,
86
- "loss": 1.7684,
87
- "step": 110
88
- },
89
- {
90
- "epoch": 0.916030534351145,
91
- "grad_norm": 11.234882354736328,
92
- "learning_rate": 1.9685950413223144e-05,
93
- "loss": 1.4861,
94
- "step": 120
95
- },
96
- {
97
- "epoch": 0.9923664122137404,
98
- "grad_norm": 12.167614936828613,
99
- "learning_rate": 1.9520661157024795e-05,
100
- "loss": 1.2402,
101
- "step": 130
102
- },
103
- {
104
- "epoch": 1.0,
105
- "eval_accuracy": 0.8944337811900192,
106
- "eval_f1": 0.8911749215123173,
107
- "eval_loss": 0.9475375413894653,
108
- "eval_precision": 0.912477309076372,
109
- "eval_recall": 0.8944337811900192,
110
- "eval_runtime": 3.1553,
111
- "eval_samples_per_second": 165.121,
112
- "eval_steps_per_second": 10.459,
113
- "step": 131
114
- },
115
- {
116
- "epoch": 1.0687022900763359,
117
- "grad_norm": 9.206323623657227,
118
- "learning_rate": 1.9355371900826446e-05,
119
- "loss": 1.0125,
120
- "step": 140
121
- },
122
- {
123
- "epoch": 1.1450381679389312,
124
- "grad_norm": 8.127516746520996,
125
- "learning_rate": 1.91900826446281e-05,
126
- "loss": 0.8771,
127
- "step": 150
128
- },
129
- {
130
- "epoch": 1.2213740458015268,
131
- "grad_norm": 13.526582717895508,
132
- "learning_rate": 1.9024793388429755e-05,
133
- "loss": 0.7546,
134
- "step": 160
135
- },
136
- {
137
- "epoch": 1.297709923664122,
138
- "grad_norm": 9.502181053161621,
139
- "learning_rate": 1.8859504132231407e-05,
140
- "loss": 0.7518,
141
- "step": 170
142
- },
143
- {
144
- "epoch": 1.3740458015267176,
145
- "grad_norm": 4.78341007232666,
146
- "learning_rate": 1.8694214876033058e-05,
147
- "loss": 0.64,
148
- "step": 180
149
- },
150
- {
151
- "epoch": 1.450381679389313,
152
- "grad_norm": 9.189094543457031,
153
- "learning_rate": 1.8528925619834712e-05,
154
- "loss": 0.497,
155
- "step": 190
156
- },
157
- {
158
- "epoch": 1.5267175572519083,
159
- "grad_norm": 14.268597602844238,
160
- "learning_rate": 1.8363636363636367e-05,
161
- "loss": 0.4995,
162
- "step": 200
163
- },
164
- {
165
- "epoch": 1.6030534351145038,
166
- "grad_norm": 9.649062156677246,
167
- "learning_rate": 1.819834710743802e-05,
168
- "loss": 0.4439,
169
- "step": 210
170
- },
171
- {
172
- "epoch": 1.6793893129770994,
173
- "grad_norm": 20.48824119567871,
174
- "learning_rate": 1.803305785123967e-05,
175
- "loss": 0.498,
176
- "step": 220
177
- },
178
- {
179
- "epoch": 1.7557251908396947,
180
- "grad_norm": 12.906113624572754,
181
- "learning_rate": 1.7867768595041324e-05,
182
- "loss": 0.4111,
183
- "step": 230
184
- },
185
- {
186
- "epoch": 1.83206106870229,
187
- "grad_norm": 18.856950759887695,
188
- "learning_rate": 1.770247933884298e-05,
189
- "loss": 0.3567,
190
- "step": 240
191
- },
192
- {
193
- "epoch": 1.9083969465648853,
194
- "grad_norm": 5.464386463165283,
195
- "learning_rate": 1.753719008264463e-05,
196
- "loss": 0.3243,
197
- "step": 250
198
- },
199
- {
200
- "epoch": 1.984732824427481,
201
- "grad_norm": 5.167541027069092,
202
- "learning_rate": 1.737190082644628e-05,
203
- "loss": 0.2887,
204
- "step": 260
205
- },
206
- {
207
- "epoch": 2.0,
208
- "eval_accuracy": 0.9750479846449136,
209
- "eval_f1": 0.9749686805377397,
210
- "eval_loss": 0.2119528353214264,
211
- "eval_precision": 0.976201785633538,
212
- "eval_recall": 0.9750479846449136,
213
- "eval_runtime": 3.0517,
214
- "eval_samples_per_second": 170.723,
215
- "eval_steps_per_second": 10.814,
216
- "step": 262
217
- },
218
- {
219
- "epoch": 2.0610687022900764,
220
- "grad_norm": 2.6736671924591064,
221
- "learning_rate": 1.7206611570247936e-05,
222
- "loss": 0.1954,
223
- "step": 270
224
- },
225
- {
226
- "epoch": 2.1374045801526718,
227
- "grad_norm": 5.401017665863037,
228
- "learning_rate": 1.7041322314049587e-05,
229
- "loss": 0.2031,
230
- "step": 280
231
- },
232
- {
233
- "epoch": 2.213740458015267,
234
- "grad_norm": 13.930877685546875,
235
- "learning_rate": 1.687603305785124e-05,
236
- "loss": 0.1799,
237
- "step": 290
238
- },
239
- {
240
- "epoch": 2.2900763358778624,
241
- "grad_norm": 6.306116104125977,
242
- "learning_rate": 1.6710743801652893e-05,
243
- "loss": 0.1459,
244
- "step": 300
245
- },
246
- {
247
- "epoch": 2.366412213740458,
248
- "grad_norm": 2.7880892753601074,
249
- "learning_rate": 1.6545454545454548e-05,
250
- "loss": 0.1426,
251
- "step": 310
252
- },
253
- {
254
- "epoch": 2.4427480916030535,
255
- "grad_norm": 15.64450740814209,
256
- "learning_rate": 1.63801652892562e-05,
257
- "loss": 0.1184,
258
- "step": 320
259
- },
260
- {
261
- "epoch": 2.519083969465649,
262
- "grad_norm": 1.4264142513275146,
263
- "learning_rate": 1.6214876033057853e-05,
264
- "loss": 0.1372,
265
- "step": 330
266
- },
267
- {
268
- "epoch": 2.595419847328244,
269
- "grad_norm": 14.163614273071289,
270
- "learning_rate": 1.6049586776859505e-05,
271
- "loss": 0.145,
272
- "step": 340
273
- },
274
- {
275
- "epoch": 2.67175572519084,
276
- "grad_norm": 5.825468063354492,
277
- "learning_rate": 1.588429752066116e-05,
278
- "loss": 0.1137,
279
- "step": 350
280
- },
281
- {
282
- "epoch": 2.7480916030534353,
283
- "grad_norm": 0.70721435546875,
284
- "learning_rate": 1.571900826446281e-05,
285
- "loss": 0.0688,
286
- "step": 360
287
- },
288
- {
289
- "epoch": 2.8244274809160306,
290
- "grad_norm": 5.984133720397949,
291
- "learning_rate": 1.5553719008264465e-05,
292
- "loss": 0.1614,
293
- "step": 370
294
- },
295
- {
296
- "epoch": 2.900763358778626,
297
- "grad_norm": 2.9067797660827637,
298
- "learning_rate": 1.5388429752066116e-05,
299
- "loss": 0.1258,
300
- "step": 380
301
- },
302
- {
303
- "epoch": 2.9770992366412212,
304
- "grad_norm": 0.7466038465499878,
305
- "learning_rate": 1.522314049586777e-05,
306
- "loss": 0.1494,
307
- "step": 390
308
- },
309
- {
310
- "epoch": 3.0,
311
- "eval_accuracy": 0.9692898272552783,
312
- "eval_f1": 0.9692039233298336,
313
- "eval_loss": 0.14140835404396057,
314
- "eval_precision": 0.9706301207884909,
315
- "eval_recall": 0.9692898272552783,
316
- "eval_runtime": 2.9931,
317
- "eval_samples_per_second": 174.066,
318
- "eval_steps_per_second": 11.025,
319
- "step": 393
320
- },
321
- {
322
- "epoch": 3.053435114503817,
323
- "grad_norm": 5.72157621383667,
324
- "learning_rate": 1.5057851239669424e-05,
325
- "loss": 0.1056,
326
- "step": 400
327
- },
328
- {
329
- "epoch": 3.1297709923664123,
330
- "grad_norm": 2.024094343185425,
331
- "learning_rate": 1.4892561983471077e-05,
332
- "loss": 0.1402,
333
- "step": 410
334
- },
335
- {
336
- "epoch": 3.2061068702290076,
337
- "grad_norm": 1.147175669670105,
338
- "learning_rate": 1.4727272727272728e-05,
339
- "loss": 0.0474,
340
- "step": 420
341
- },
342
- {
343
- "epoch": 3.282442748091603,
344
- "grad_norm": 4.005652904510498,
345
- "learning_rate": 1.4561983471074381e-05,
346
- "loss": 0.076,
347
- "step": 430
348
- },
349
- {
350
- "epoch": 3.3587786259541983,
351
- "grad_norm": 0.9247184991836548,
352
- "learning_rate": 1.4396694214876035e-05,
353
- "loss": 0.0709,
354
- "step": 440
355
- },
356
- {
357
- "epoch": 3.435114503816794,
358
- "grad_norm": 4.577192306518555,
359
- "learning_rate": 1.4231404958677688e-05,
360
- "loss": 0.0678,
361
- "step": 450
362
- },
363
- {
364
- "epoch": 3.5114503816793894,
365
- "grad_norm": 0.21287904679775238,
366
- "learning_rate": 1.406611570247934e-05,
367
- "loss": 0.0404,
368
- "step": 460
369
- },
370
- {
371
- "epoch": 3.5877862595419847,
372
- "grad_norm": 0.67902010679245,
373
- "learning_rate": 1.3900826446280993e-05,
374
- "loss": 0.0508,
375
- "step": 470
376
- },
377
- {
378
- "epoch": 3.66412213740458,
379
- "grad_norm": 0.8000791072845459,
380
- "learning_rate": 1.3735537190082645e-05,
381
- "loss": 0.0473,
382
- "step": 480
383
- },
384
- {
385
- "epoch": 3.7404580152671754,
386
- "grad_norm": 1.3421847820281982,
387
- "learning_rate": 1.35702479338843e-05,
388
- "loss": 0.0223,
389
- "step": 490
390
- },
391
- {
392
- "epoch": 3.816793893129771,
393
- "grad_norm": 0.182773157954216,
394
- "learning_rate": 1.3404958677685951e-05,
395
- "loss": 0.0196,
396
- "step": 500
397
- },
398
- {
399
- "epoch": 3.8931297709923665,
400
- "grad_norm": 1.4306972026824951,
401
- "learning_rate": 1.3239669421487604e-05,
402
- "loss": 0.0205,
403
- "step": 510
404
- },
405
- {
406
- "epoch": 3.969465648854962,
407
- "grad_norm": 1.896088719367981,
408
- "learning_rate": 1.3074380165289257e-05,
409
- "loss": 0.0212,
410
- "step": 520
411
- },
412
- {
413
- "epoch": 4.0,
414
- "eval_accuracy": 0.9769673704414588,
415
- "eval_f1": 0.9769688162747627,
416
- "eval_loss": 0.12679165601730347,
417
- "eval_precision": 0.9782792844480811,
418
- "eval_recall": 0.9769673704414588,
419
- "eval_runtime": 3.1232,
420
- "eval_samples_per_second": 166.818,
421
- "eval_steps_per_second": 10.566,
422
- "step": 524
423
- },
424
- {
425
- "epoch": 4.0458015267175576,
426
- "grad_norm": 0.14600762724876404,
427
- "learning_rate": 1.2909090909090912e-05,
428
- "loss": 0.0222,
429
- "step": 530
430
- },
431
- {
432
- "epoch": 4.122137404580153,
433
- "grad_norm": 8.074915885925293,
434
- "learning_rate": 1.2743801652892563e-05,
435
- "loss": 0.0542,
436
- "step": 540
437
- },
438
- {
439
- "epoch": 4.198473282442748,
440
- "grad_norm": 0.09765351563692093,
441
- "learning_rate": 1.2578512396694216e-05,
442
- "loss": 0.0394,
443
- "step": 550
444
- },
445
- {
446
- "epoch": 4.2748091603053435,
447
- "grad_norm": 0.33209875226020813,
448
- "learning_rate": 1.2413223140495869e-05,
449
- "loss": 0.0241,
450
- "step": 560
451
- },
452
- {
453
- "epoch": 4.351145038167939,
454
- "grad_norm": 0.5309058427810669,
455
- "learning_rate": 1.2247933884297522e-05,
456
- "loss": 0.0161,
457
- "step": 570
458
- },
459
- {
460
- "epoch": 4.427480916030534,
461
- "grad_norm": 0.1629948765039444,
462
- "learning_rate": 1.2082644628099173e-05,
463
- "loss": 0.0129,
464
- "step": 580
465
- },
466
- {
467
- "epoch": 4.5038167938931295,
468
- "grad_norm": 0.15240447223186493,
469
- "learning_rate": 1.1917355371900828e-05,
470
- "loss": 0.0128,
471
- "step": 590
472
- },
473
- {
474
- "epoch": 4.580152671755725,
475
- "grad_norm": 0.10693137347698212,
476
- "learning_rate": 1.175206611570248e-05,
477
- "loss": 0.0724,
478
- "step": 600
479
- },
480
- {
481
- "epoch": 4.65648854961832,
482
- "grad_norm": 0.8860049843788147,
483
- "learning_rate": 1.1586776859504133e-05,
484
- "loss": 0.013,
485
- "step": 610
486
- },
487
- {
488
- "epoch": 4.732824427480916,
489
- "grad_norm": 1.1124643087387085,
490
- "learning_rate": 1.1421487603305785e-05,
491
- "loss": 0.0228,
492
- "step": 620
493
- },
494
- {
495
- "epoch": 4.809160305343512,
496
- "grad_norm": 16.63216209411621,
497
- "learning_rate": 1.125619834710744e-05,
498
- "loss": 0.1361,
499
- "step": 630
500
- },
501
- {
502
- "epoch": 4.885496183206107,
503
- "grad_norm": 0.22511304914951324,
504
- "learning_rate": 1.1090909090909092e-05,
505
- "loss": 0.0127,
506
- "step": 640
507
- },
508
- {
509
- "epoch": 4.961832061068702,
510
- "grad_norm": 0.2706206142902374,
511
- "learning_rate": 1.0925619834710745e-05,
512
- "loss": 0.0127,
513
- "step": 650
514
- },
515
- {
516
- "epoch": 5.0,
517
- "eval_accuracy": 0.9731285988483686,
518
- "eval_f1": 0.973064976375106,
519
- "eval_loss": 0.14903880655765533,
520
- "eval_precision": 0.9743006090972162,
521
- "eval_recall": 0.9731285988483686,
522
- "eval_runtime": 3.1635,
523
- "eval_samples_per_second": 164.69,
524
- "eval_steps_per_second": 10.431,
525
- "step": 655
526
- },
527
- {
528
- "epoch": 5.038167938931298,
529
- "grad_norm": 26.473268508911133,
530
- "learning_rate": 1.0760330578512396e-05,
531
- "loss": 0.0238,
532
- "step": 660
533
- },
534
- {
535
- "epoch": 5.114503816793893,
536
- "grad_norm": 0.08209118992090225,
537
- "learning_rate": 1.0595041322314051e-05,
538
- "loss": 0.0107,
539
- "step": 670
540
- },
541
- {
542
- "epoch": 5.190839694656488,
543
- "grad_norm": 1.1051641702651978,
544
- "learning_rate": 1.0429752066115704e-05,
545
- "loss": 0.0682,
546
- "step": 680
547
- },
548
- {
549
- "epoch": 5.267175572519084,
550
- "grad_norm": 11.310916900634766,
551
- "learning_rate": 1.0264462809917357e-05,
552
- "loss": 0.0131,
553
- "step": 690
554
- },
555
- {
556
- "epoch": 5.34351145038168,
557
- "grad_norm": 0.09134263545274734,
558
- "learning_rate": 1.0099173553719008e-05,
559
- "loss": 0.0428,
560
- "step": 700
561
- },
562
- {
563
- "epoch": 5.419847328244275,
564
- "grad_norm": 0.08655811846256256,
565
- "learning_rate": 9.933884297520661e-06,
566
- "loss": 0.0246,
567
- "step": 710
568
- },
569
- {
570
- "epoch": 5.4961832061068705,
571
- "grad_norm": 0.16410402953624725,
572
- "learning_rate": 9.768595041322316e-06,
573
- "loss": 0.0094,
574
- "step": 720
575
- },
576
- {
577
- "epoch": 5.572519083969466,
578
- "grad_norm": 1.349546194076538,
579
- "learning_rate": 9.603305785123967e-06,
580
- "loss": 0.045,
581
- "step": 730
582
- },
583
- {
584
- "epoch": 5.648854961832061,
585
- "grad_norm": 0.12257255613803864,
586
- "learning_rate": 9.438016528925621e-06,
587
- "loss": 0.0516,
588
- "step": 740
589
- },
590
- {
591
- "epoch": 5.7251908396946565,
592
- "grad_norm": 0.06910885125398636,
593
- "learning_rate": 9.272727272727273e-06,
594
- "loss": 0.009,
595
- "step": 750
596
- },
597
- {
598
- "epoch": 5.801526717557252,
599
- "grad_norm": 0.056132227182388306,
600
- "learning_rate": 9.107438016528927e-06,
601
- "loss": 0.0107,
602
- "step": 760
603
- },
604
- {
605
- "epoch": 5.877862595419847,
606
- "grad_norm": 0.07667958736419678,
607
- "learning_rate": 8.942148760330578e-06,
608
- "loss": 0.0086,
609
- "step": 770
610
- },
611
- {
612
- "epoch": 5.9541984732824424,
613
- "grad_norm": 0.10609736293554306,
614
- "learning_rate": 8.776859504132233e-06,
615
- "loss": 0.0085,
616
- "step": 780
617
- },
618
- {
619
- "epoch": 6.0,
620
- "eval_accuracy": 0.9788867562380038,
621
- "eval_f1": 0.9789720270641704,
622
- "eval_loss": 0.12155096977949142,
623
- "eval_precision": 0.9801130700504813,
624
- "eval_recall": 0.9788867562380038,
625
- "eval_runtime": 3.2955,
626
- "eval_samples_per_second": 158.096,
627
- "eval_steps_per_second": 10.014,
628
- "step": 786
629
- },
630
- {
631
- "epoch": 6.030534351145038,
632
- "grad_norm": 0.06408526748418808,
633
- "learning_rate": 8.611570247933884e-06,
634
- "loss": 0.0081,
635
- "step": 790
636
- },
637
- {
638
- "epoch": 6.106870229007634,
639
- "grad_norm": 0.07884930074214935,
640
- "learning_rate": 8.446280991735539e-06,
641
- "loss": 0.031,
642
- "step": 800
643
- },
644
- {
645
- "epoch": 6.183206106870229,
646
- "grad_norm": 0.07998275011777878,
647
- "learning_rate": 8.28099173553719e-06,
648
- "loss": 0.0453,
649
- "step": 810
650
- },
651
- {
652
- "epoch": 6.259541984732825,
653
- "grad_norm": 0.22578206658363342,
654
- "learning_rate": 8.115702479338843e-06,
655
- "loss": 0.0078,
656
- "step": 820
657
- },
658
- {
659
- "epoch": 6.33587786259542,
660
- "grad_norm": 0.07642875611782074,
661
- "learning_rate": 7.950413223140496e-06,
662
- "loss": 0.0086,
663
- "step": 830
664
- },
665
- {
666
- "epoch": 6.412213740458015,
667
- "grad_norm": 0.10305721312761307,
668
- "learning_rate": 7.785123966942149e-06,
669
- "loss": 0.0444,
670
- "step": 840
671
- },
672
- {
673
- "epoch": 6.488549618320611,
674
- "grad_norm": 0.0701122134923935,
675
- "learning_rate": 7.619834710743802e-06,
676
- "loss": 0.0077,
677
- "step": 850
678
- },
679
- {
680
- "epoch": 6.564885496183206,
681
- "grad_norm": 0.07119292765855789,
682
- "learning_rate": 7.454545454545456e-06,
683
- "loss": 0.0076,
684
- "step": 860
685
- },
686
- {
687
- "epoch": 6.641221374045801,
688
- "grad_norm": 0.2685672342777252,
689
- "learning_rate": 7.289256198347108e-06,
690
- "loss": 0.0077,
691
- "step": 870
692
- },
693
- {
694
- "epoch": 6.717557251908397,
695
- "grad_norm": 0.0628926083445549,
696
- "learning_rate": 7.1239669421487615e-06,
697
- "loss": 0.0072,
698
- "step": 880
699
- },
700
- {
701
- "epoch": 6.793893129770993,
702
- "grad_norm": 0.06299301236867905,
703
- "learning_rate": 6.9586776859504135e-06,
704
- "loss": 0.0109,
705
- "step": 890
706
- },
707
- {
708
- "epoch": 6.870229007633588,
709
- "grad_norm": 0.06120818480849266,
710
- "learning_rate": 6.793388429752067e-06,
711
- "loss": 0.0069,
712
- "step": 900
713
- },
714
- {
715
- "epoch": 6.9465648854961835,
716
- "grad_norm": 0.08700945228338242,
717
- "learning_rate": 6.628099173553719e-06,
718
- "loss": 0.0073,
719
- "step": 910
720
- },
721
- {
722
- "epoch": 7.0,
723
- "eval_accuracy": 0.980806142034549,
724
- "eval_f1": 0.980791368968265,
725
- "eval_loss": 0.11875477433204651,
726
- "eval_precision": 0.981936841149893,
727
- "eval_recall": 0.980806142034549,
728
- "eval_runtime": 3.1069,
729
- "eval_samples_per_second": 167.692,
730
- "eval_steps_per_second": 10.622,
731
- "step": 917
732
- },
733
- {
734
- "epoch": 7.022900763358779,
735
- "grad_norm": 0.061511170119047165,
736
- "learning_rate": 6.462809917355372e-06,
737
- "loss": 0.0066,
738
- "step": 920
739
- },
740
- {
741
- "epoch": 7.099236641221374,
742
- "grad_norm": 0.06128810718655586,
743
- "learning_rate": 6.297520661157025e-06,
744
- "loss": 0.0064,
745
- "step": 930
746
- },
747
- {
748
- "epoch": 7.175572519083969,
749
- "grad_norm": 0.05454257130622864,
750
- "learning_rate": 6.132231404958678e-06,
751
- "loss": 0.0066,
752
- "step": 940
753
- },
754
- {
755
- "epoch": 7.251908396946565,
756
- "grad_norm": 0.09356739372015,
757
- "learning_rate": 5.966942148760331e-06,
758
- "loss": 0.0065,
759
- "step": 950
760
- },
761
- {
762
- "epoch": 7.32824427480916,
763
- "grad_norm": 0.04699549078941345,
764
- "learning_rate": 5.801652892561984e-06,
765
- "loss": 0.006,
766
- "step": 960
767
- },
768
- {
769
- "epoch": 7.404580152671755,
770
- "grad_norm": 0.04597270488739014,
771
- "learning_rate": 5.636363636363636e-06,
772
- "loss": 0.0063,
773
- "step": 970
774
- },
775
- {
776
- "epoch": 7.480916030534351,
777
- "grad_norm": 0.05777190253138542,
778
- "learning_rate": 5.47107438016529e-06,
779
- "loss": 0.0057,
780
- "step": 980
781
- },
782
- {
783
- "epoch": 7.557251908396947,
784
- "grad_norm": 0.0520237572491169,
785
- "learning_rate": 5.305785123966942e-06,
786
- "loss": 0.006,
787
- "step": 990
788
- },
789
- {
790
- "epoch": 7.633587786259542,
791
- "grad_norm": 0.0427822545170784,
792
- "learning_rate": 5.140495867768596e-06,
793
- "loss": 0.0059,
794
- "step": 1000
795
- },
796
- {
797
- "epoch": 7.709923664122138,
798
- "grad_norm": 0.05699237063527107,
799
- "learning_rate": 4.975206611570249e-06,
800
- "loss": 0.0055,
801
- "step": 1010
802
- },
803
- {
804
- "epoch": 7.786259541984733,
805
- "grad_norm": 0.05885695666074753,
806
- "learning_rate": 4.8099173553719015e-06,
807
- "loss": 0.0258,
808
- "step": 1020
809
- },
810
- {
811
- "epoch": 7.862595419847328,
812
- "grad_norm": 0.05190462991595268,
813
- "learning_rate": 4.6446280991735544e-06,
814
- "loss": 0.0496,
815
- "step": 1030
816
- },
817
- {
818
- "epoch": 7.938931297709924,
819
- "grad_norm": 0.03909669816493988,
820
- "learning_rate": 4.479338842975207e-06,
821
- "loss": 0.0398,
822
- "step": 1040
823
- },
824
- {
825
- "epoch": 8.0,
826
- "eval_accuracy": 0.980806142034549,
827
- "eval_f1": 0.980791368968265,
828
- "eval_loss": 0.12089628726243973,
829
- "eval_precision": 0.981936841149893,
830
- "eval_recall": 0.980806142034549,
831
- "eval_runtime": 3.1129,
832
- "eval_samples_per_second": 167.366,
833
- "eval_steps_per_second": 10.601,
834
- "step": 1048
835
- },
836
- {
837
- "epoch": 8.01526717557252,
838
- "grad_norm": 0.05181822180747986,
839
- "learning_rate": 4.31404958677686e-06,
840
- "loss": 0.0062,
841
- "step": 1050
842
- },
843
- {
844
- "epoch": 8.091603053435115,
845
- "grad_norm": 0.03777517005801201,
846
- "learning_rate": 4.148760330578513e-06,
847
- "loss": 0.0058,
848
- "step": 1060
849
- },
850
- {
851
- "epoch": 8.16793893129771,
852
- "grad_norm": 0.04515732452273369,
853
- "learning_rate": 3.983471074380166e-06,
854
- "loss": 0.0056,
855
- "step": 1070
856
- },
857
- {
858
- "epoch": 8.244274809160306,
859
- "grad_norm": 0.044928282499313354,
860
- "learning_rate": 3.818181818181819e-06,
861
- "loss": 0.0055,
862
- "step": 1080
863
- },
864
- {
865
- "epoch": 8.320610687022901,
866
- "grad_norm": 0.05599347501993179,
867
- "learning_rate": 3.6528925619834715e-06,
868
- "loss": 0.0057,
869
- "step": 1090
870
- },
871
- {
872
- "epoch": 8.396946564885496,
873
- "grad_norm": 1.0466651916503906,
874
- "learning_rate": 3.4876033057851245e-06,
875
- "loss": 0.0384,
876
- "step": 1100
877
- },
878
- {
879
- "epoch": 8.473282442748092,
880
- "grad_norm": 0.05839056894183159,
881
- "learning_rate": 3.3223140495867774e-06,
882
- "loss": 0.0057,
883
- "step": 1110
884
- },
885
- {
886
- "epoch": 8.549618320610687,
887
- "grad_norm": 0.05969908460974693,
888
- "learning_rate": 3.1570247933884303e-06,
889
- "loss": 0.0424,
890
- "step": 1120
891
- },
892
- {
893
- "epoch": 8.625954198473282,
894
- "grad_norm": 0.06252706795930862,
895
- "learning_rate": 2.9917355371900832e-06,
896
- "loss": 0.0174,
897
- "step": 1130
898
- },
899
- {
900
- "epoch": 8.702290076335878,
901
- "grad_norm": 0.1538064330816269,
902
- "learning_rate": 2.8264462809917357e-06,
903
- "loss": 0.0058,
904
- "step": 1140
905
- },
906
- {
907
- "epoch": 8.778625954198473,
908
- "grad_norm": 0.05743182823061943,
909
- "learning_rate": 2.6611570247933886e-06,
910
- "loss": 0.0055,
911
- "step": 1150
912
- },
913
- {
914
- "epoch": 8.854961832061068,
915
- "grad_norm": 0.06665431708097458,
916
- "learning_rate": 2.4958677685950416e-06,
917
- "loss": 0.0057,
918
- "step": 1160
919
- },
920
- {
921
- "epoch": 8.931297709923664,
922
- "grad_norm": 0.07899218052625656,
923
- "learning_rate": 2.3305785123966945e-06,
924
- "loss": 0.0055,
925
- "step": 1170
926
- },
927
- {
928
- "epoch": 9.0,
929
- "eval_accuracy": 0.980806142034549,
930
- "eval_f1": 0.9807999304274743,
931
- "eval_loss": 0.12244618684053421,
932
- "eval_precision": 0.9819563131797131,
933
- "eval_recall": 0.980806142034549,
934
- "eval_runtime": 3.1404,
935
- "eval_samples_per_second": 165.902,
936
- "eval_steps_per_second": 10.508,
937
- "step": 1179
938
- }
939
- ],
940
- "logging_steps": 10,
941
- "max_steps": 1310,
942
- "num_input_tokens_seen": 0,
943
- "num_train_epochs": 10,
944
- "save_steps": 500,
945
- "stateful_callbacks": {
946
- "EarlyStoppingCallback": {
947
- "args": {
948
- "early_stopping_patience": 3,
949
- "early_stopping_threshold": 0.0
950
- },
951
- "attributes": {
952
- "early_stopping_patience_counter": 2
953
- }
954
- },
955
- "TrainerControl": {
956
- "args": {
957
- "should_epoch_stop": false,
958
- "should_evaluate": false,
959
- "should_log": false,
960
- "should_save": true,
961
- "should_training_stop": false
962
- },
963
- "attributes": {}
964
- }
965
- },
966
- "total_flos": 1233335031616512.0,
967
- "train_batch_size": 16,
968
- "trial_name": null,
969
- "trial_params": null
970
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1179/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:21a4ac2a72f23cb69080a4fb3a9a3266e6a76062c2c55904cad3d4237f62c83e
3
- size 5841
 
 
 
 
checkpoint-1310/config.json DELETED
@@ -1,71 +0,0 @@
1
- {
2
- "architectures": [
3
- "RobertaForSequenceClassification"
4
- ],
5
- "attention_probs_dropout_prob": 0.1,
6
- "bos_token_id": 0,
7
- "classifier_dropout": null,
8
- "dtype": "float32",
9
- "eos_token_id": 2,
10
- "hidden_act": "gelu",
11
- "hidden_dropout_prob": 0.1,
12
- "hidden_size": 768,
13
- "id2label": {
14
- "0": "add_contact",
15
- "1": "edit_phone",
16
- "2": "edit_email",
17
- "3": "edit_address",
18
- "4": "delete_contact",
19
- "5": "list_all_contacts",
20
- "6": "search_contacts",
21
- "7": "add_birthday",
22
- "8": "list_birthdays",
23
- "9": "add_note",
24
- "10": "edit_note",
25
- "11": "delete_note",
26
- "12": "show_notes",
27
- "13": "add_note_tag",
28
- "14": "remove_note_tag",
29
- "15": "search_notes_text",
30
- "16": "search_notes_by_tag",
31
- "17": "help",
32
- "18": "exit",
33
- "19": "hello"
34
- },
35
- "initializer_range": 0.02,
36
- "intermediate_size": 3072,
37
- "label2id": {
38
- "add_birthday": 7,
39
- "add_contact": 0,
40
- "add_note": 9,
41
- "add_note_tag": 13,
42
- "delete_contact": 4,
43
- "delete_note": 11,
44
- "edit_address": 3,
45
- "edit_email": 2,
46
- "edit_note": 10,
47
- "edit_phone": 1,
48
- "exit": 18,
49
- "hello": 19,
50
- "help": 17,
51
- "list_all_contacts": 5,
52
- "list_birthdays": 8,
53
- "remove_note_tag": 14,
54
- "search_contacts": 6,
55
- "search_notes_by_tag": 16,
56
- "search_notes_text": 15,
57
- "show_notes": 12
58
- },
59
- "layer_norm_eps": 1e-05,
60
- "max_position_embeddings": 514,
61
- "model_type": "roberta",
62
- "num_attention_heads": 12,
63
- "num_hidden_layers": 12,
64
- "pad_token_id": 1,
65
- "position_embedding_type": "absolute",
66
- "problem_type": "single_label_classification",
67
- "transformers_version": "4.57.0",
68
- "type_vocab_size": 1,
69
- "use_cache": true,
70
- "vocab_size": 50265
71
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1310/model.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c4a7120d267e8b923b5542c1ea6aeba65facf08182ea362827113fe36481f4e4
3
- size 498668192
 
 
 
 
checkpoint-1310/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6450cd41a91b312e87c4978cd7fdaee344ccafc5b63f22d076fc1187307eaf9a
3
- size 997451019
 
 
 
 
checkpoint-1310/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:791590502d32babfec3e01cad84acac1a5c5f69449f6851db53f4aead2041f79
3
- size 14455
 
 
 
 
checkpoint-1310/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef479fcc9aa8f88124c5b577e46a56b83c9f1415e04b0e4ef3b46ce53586f6bb
3
- size 1465
 
 
 
 
checkpoint-1310/trainer_state.json DELETED
@@ -1,1080 +0,0 @@
1
- {
2
- "best_global_step": 1310,
3
- "best_metric": 0.982725527831094,
4
- "best_model_checkpoint": "models/intent_classifier/checkpoint-1310",
5
- "epoch": 10.0,
6
- "eval_steps": 500,
7
- "global_step": 1310,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.07633587786259542,
14
- "grad_norm": 4.617219924926758,
15
- "learning_rate": 1.8000000000000001e-06,
16
- "loss": 3.0068,
17
- "step": 10
18
- },
19
- {
20
- "epoch": 0.15267175572519084,
21
- "grad_norm": 2.9202663898468018,
22
- "learning_rate": 3.8000000000000005e-06,
23
- "loss": 2.9968,
24
- "step": 20
25
- },
26
- {
27
- "epoch": 0.22900763358778625,
28
- "grad_norm": 2.677699327468872,
29
- "learning_rate": 5.8e-06,
30
- "loss": 2.9956,
31
- "step": 30
32
- },
33
- {
34
- "epoch": 0.3053435114503817,
35
- "grad_norm": 3.113600254058838,
36
- "learning_rate": 7.800000000000002e-06,
37
- "loss": 3.0013,
38
- "step": 40
39
- },
40
- {
41
- "epoch": 0.3816793893129771,
42
- "grad_norm": 3.7662277221679688,
43
- "learning_rate": 9.800000000000001e-06,
44
- "loss": 2.9809,
45
- "step": 50
46
- },
47
- {
48
- "epoch": 0.4580152671755725,
49
- "grad_norm": 8.282344818115234,
50
- "learning_rate": 1.18e-05,
51
- "loss": 2.9022,
52
- "step": 60
53
- },
54
- {
55
- "epoch": 0.5343511450381679,
56
- "grad_norm": 8.860713005065918,
57
- "learning_rate": 1.38e-05,
58
- "loss": 2.7668,
59
- "step": 70
60
- },
61
- {
62
- "epoch": 0.6106870229007634,
63
- "grad_norm": 11.435643196105957,
64
- "learning_rate": 1.58e-05,
65
- "loss": 2.5216,
66
- "step": 80
67
- },
68
- {
69
- "epoch": 0.6870229007633588,
70
- "grad_norm": 10.48116397857666,
71
- "learning_rate": 1.7800000000000002e-05,
72
- "loss": 2.3281,
73
- "step": 90
74
- },
75
- {
76
- "epoch": 0.7633587786259542,
77
- "grad_norm": 12.855015754699707,
78
- "learning_rate": 1.98e-05,
79
- "loss": 2.0133,
80
- "step": 100
81
- },
82
- {
83
- "epoch": 0.8396946564885496,
84
- "grad_norm": 12.89151668548584,
85
- "learning_rate": 1.985123966942149e-05,
86
- "loss": 1.7684,
87
- "step": 110
88
- },
89
- {
90
- "epoch": 0.916030534351145,
91
- "grad_norm": 11.234882354736328,
92
- "learning_rate": 1.9685950413223144e-05,
93
- "loss": 1.4861,
94
- "step": 120
95
- },
96
- {
97
- "epoch": 0.9923664122137404,
98
- "grad_norm": 12.167614936828613,
99
- "learning_rate": 1.9520661157024795e-05,
100
- "loss": 1.2402,
101
- "step": 130
102
- },
103
- {
104
- "epoch": 1.0,
105
- "eval_accuracy": 0.8944337811900192,
106
- "eval_f1": 0.8911749215123173,
107
- "eval_loss": 0.9475375413894653,
108
- "eval_precision": 0.912477309076372,
109
- "eval_recall": 0.8944337811900192,
110
- "eval_runtime": 3.1553,
111
- "eval_samples_per_second": 165.121,
112
- "eval_steps_per_second": 10.459,
113
- "step": 131
114
- },
115
- {
116
- "epoch": 1.0687022900763359,
117
- "grad_norm": 9.206323623657227,
118
- "learning_rate": 1.9355371900826446e-05,
119
- "loss": 1.0125,
120
- "step": 140
121
- },
122
- {
123
- "epoch": 1.1450381679389312,
124
- "grad_norm": 8.127516746520996,
125
- "learning_rate": 1.91900826446281e-05,
126
- "loss": 0.8771,
127
- "step": 150
128
- },
129
- {
130
- "epoch": 1.2213740458015268,
131
- "grad_norm": 13.526582717895508,
132
- "learning_rate": 1.9024793388429755e-05,
133
- "loss": 0.7546,
134
- "step": 160
135
- },
136
- {
137
- "epoch": 1.297709923664122,
138
- "grad_norm": 9.502181053161621,
139
- "learning_rate": 1.8859504132231407e-05,
140
- "loss": 0.7518,
141
- "step": 170
142
- },
143
- {
144
- "epoch": 1.3740458015267176,
145
- "grad_norm": 4.78341007232666,
146
- "learning_rate": 1.8694214876033058e-05,
147
- "loss": 0.64,
148
- "step": 180
149
- },
150
- {
151
- "epoch": 1.450381679389313,
152
- "grad_norm": 9.189094543457031,
153
- "learning_rate": 1.8528925619834712e-05,
154
- "loss": 0.497,
155
- "step": 190
156
- },
157
- {
158
- "epoch": 1.5267175572519083,
159
- "grad_norm": 14.268597602844238,
160
- "learning_rate": 1.8363636363636367e-05,
161
- "loss": 0.4995,
162
- "step": 200
163
- },
164
- {
165
- "epoch": 1.6030534351145038,
166
- "grad_norm": 9.649062156677246,
167
- "learning_rate": 1.819834710743802e-05,
168
- "loss": 0.4439,
169
- "step": 210
170
- },
171
- {
172
- "epoch": 1.6793893129770994,
173
- "grad_norm": 20.48824119567871,
174
- "learning_rate": 1.803305785123967e-05,
175
- "loss": 0.498,
176
- "step": 220
177
- },
178
- {
179
- "epoch": 1.7557251908396947,
180
- "grad_norm": 12.906113624572754,
181
- "learning_rate": 1.7867768595041324e-05,
182
- "loss": 0.4111,
183
- "step": 230
184
- },
185
- {
186
- "epoch": 1.83206106870229,
187
- "grad_norm": 18.856950759887695,
188
- "learning_rate": 1.770247933884298e-05,
189
- "loss": 0.3567,
190
- "step": 240
191
- },
192
- {
193
- "epoch": 1.9083969465648853,
194
- "grad_norm": 5.464386463165283,
195
- "learning_rate": 1.753719008264463e-05,
196
- "loss": 0.3243,
197
- "step": 250
198
- },
199
- {
200
- "epoch": 1.984732824427481,
201
- "grad_norm": 5.167541027069092,
202
- "learning_rate": 1.737190082644628e-05,
203
- "loss": 0.2887,
204
- "step": 260
205
- },
206
- {
207
- "epoch": 2.0,
208
- "eval_accuracy": 0.9750479846449136,
209
- "eval_f1": 0.9749686805377397,
210
- "eval_loss": 0.2119528353214264,
211
- "eval_precision": 0.976201785633538,
212
- "eval_recall": 0.9750479846449136,
213
- "eval_runtime": 3.0517,
214
- "eval_samples_per_second": 170.723,
215
- "eval_steps_per_second": 10.814,
216
- "step": 262
217
- },
218
- {
219
- "epoch": 2.0610687022900764,
220
- "grad_norm": 2.6736671924591064,
221
- "learning_rate": 1.7206611570247936e-05,
222
- "loss": 0.1954,
223
- "step": 270
224
- },
225
- {
226
- "epoch": 2.1374045801526718,
227
- "grad_norm": 5.401017665863037,
228
- "learning_rate": 1.7041322314049587e-05,
229
- "loss": 0.2031,
230
- "step": 280
231
- },
232
- {
233
- "epoch": 2.213740458015267,
234
- "grad_norm": 13.930877685546875,
235
- "learning_rate": 1.687603305785124e-05,
236
- "loss": 0.1799,
237
- "step": 290
238
- },
239
- {
240
- "epoch": 2.2900763358778624,
241
- "grad_norm": 6.306116104125977,
242
- "learning_rate": 1.6710743801652893e-05,
243
- "loss": 0.1459,
244
- "step": 300
245
- },
246
- {
247
- "epoch": 2.366412213740458,
248
- "grad_norm": 2.7880892753601074,
249
- "learning_rate": 1.6545454545454548e-05,
250
- "loss": 0.1426,
251
- "step": 310
252
- },
253
- {
254
- "epoch": 2.4427480916030535,
255
- "grad_norm": 15.64450740814209,
256
- "learning_rate": 1.63801652892562e-05,
257
- "loss": 0.1184,
258
- "step": 320
259
- },
260
- {
261
- "epoch": 2.519083969465649,
262
- "grad_norm": 1.4264142513275146,
263
- "learning_rate": 1.6214876033057853e-05,
264
- "loss": 0.1372,
265
- "step": 330
266
- },
267
- {
268
- "epoch": 2.595419847328244,
269
- "grad_norm": 14.163614273071289,
270
- "learning_rate": 1.6049586776859505e-05,
271
- "loss": 0.145,
272
- "step": 340
273
- },
274
- {
275
- "epoch": 2.67175572519084,
276
- "grad_norm": 5.825468063354492,
277
- "learning_rate": 1.588429752066116e-05,
278
- "loss": 0.1137,
279
- "step": 350
280
- },
281
- {
282
- "epoch": 2.7480916030534353,
283
- "grad_norm": 0.70721435546875,
284
- "learning_rate": 1.571900826446281e-05,
285
- "loss": 0.0688,
286
- "step": 360
287
- },
288
- {
289
- "epoch": 2.8244274809160306,
290
- "grad_norm": 5.984133720397949,
291
- "learning_rate": 1.5553719008264465e-05,
292
- "loss": 0.1614,
293
- "step": 370
294
- },
295
- {
296
- "epoch": 2.900763358778626,
297
- "grad_norm": 2.9067797660827637,
298
- "learning_rate": 1.5388429752066116e-05,
299
- "loss": 0.1258,
300
- "step": 380
301
- },
302
- {
303
- "epoch": 2.9770992366412212,
304
- "grad_norm": 0.7466038465499878,
305
- "learning_rate": 1.522314049586777e-05,
306
- "loss": 0.1494,
307
- "step": 390
308
- },
309
- {
310
- "epoch": 3.0,
311
- "eval_accuracy": 0.9692898272552783,
312
- "eval_f1": 0.9692039233298336,
313
- "eval_loss": 0.14140835404396057,
314
- "eval_precision": 0.9706301207884909,
315
- "eval_recall": 0.9692898272552783,
316
- "eval_runtime": 2.9931,
317
- "eval_samples_per_second": 174.066,
318
- "eval_steps_per_second": 11.025,
319
- "step": 393
320
- },
321
- {
322
- "epoch": 3.053435114503817,
323
- "grad_norm": 5.72157621383667,
324
- "learning_rate": 1.5057851239669424e-05,
325
- "loss": 0.1056,
326
- "step": 400
327
- },
328
- {
329
- "epoch": 3.1297709923664123,
330
- "grad_norm": 2.024094343185425,
331
- "learning_rate": 1.4892561983471077e-05,
332
- "loss": 0.1402,
333
- "step": 410
334
- },
335
- {
336
- "epoch": 3.2061068702290076,
337
- "grad_norm": 1.147175669670105,
338
- "learning_rate": 1.4727272727272728e-05,
339
- "loss": 0.0474,
340
- "step": 420
341
- },
342
- {
343
- "epoch": 3.282442748091603,
344
- "grad_norm": 4.005652904510498,
345
- "learning_rate": 1.4561983471074381e-05,
346
- "loss": 0.076,
347
- "step": 430
348
- },
349
- {
350
- "epoch": 3.3587786259541983,
351
- "grad_norm": 0.9247184991836548,
352
- "learning_rate": 1.4396694214876035e-05,
353
- "loss": 0.0709,
354
- "step": 440
355
- },
356
- {
357
- "epoch": 3.435114503816794,
358
- "grad_norm": 4.577192306518555,
359
- "learning_rate": 1.4231404958677688e-05,
360
- "loss": 0.0678,
361
- "step": 450
362
- },
363
- {
364
- "epoch": 3.5114503816793894,
365
- "grad_norm": 0.21287904679775238,
366
- "learning_rate": 1.406611570247934e-05,
367
- "loss": 0.0404,
368
- "step": 460
369
- },
370
- {
371
- "epoch": 3.5877862595419847,
372
- "grad_norm": 0.67902010679245,
373
- "learning_rate": 1.3900826446280993e-05,
374
- "loss": 0.0508,
375
- "step": 470
376
- },
377
- {
378
- "epoch": 3.66412213740458,
379
- "grad_norm": 0.8000791072845459,
380
- "learning_rate": 1.3735537190082645e-05,
381
- "loss": 0.0473,
382
- "step": 480
383
- },
384
- {
385
- "epoch": 3.7404580152671754,
386
- "grad_norm": 1.3421847820281982,
387
- "learning_rate": 1.35702479338843e-05,
388
- "loss": 0.0223,
389
- "step": 490
390
- },
391
- {
392
- "epoch": 3.816793893129771,
393
- "grad_norm": 0.182773157954216,
394
- "learning_rate": 1.3404958677685951e-05,
395
- "loss": 0.0196,
396
- "step": 500
397
- },
398
- {
399
- "epoch": 3.8931297709923665,
400
- "grad_norm": 1.4306972026824951,
401
- "learning_rate": 1.3239669421487604e-05,
402
- "loss": 0.0205,
403
- "step": 510
404
- },
405
- {
406
- "epoch": 3.969465648854962,
407
- "grad_norm": 1.896088719367981,
408
- "learning_rate": 1.3074380165289257e-05,
409
- "loss": 0.0212,
410
- "step": 520
411
- },
412
- {
413
- "epoch": 4.0,
414
- "eval_accuracy": 0.9769673704414588,
415
- "eval_f1": 0.9769688162747627,
416
- "eval_loss": 0.12679165601730347,
417
- "eval_precision": 0.9782792844480811,
418
- "eval_recall": 0.9769673704414588,
419
- "eval_runtime": 3.1232,
420
- "eval_samples_per_second": 166.818,
421
- "eval_steps_per_second": 10.566,
422
- "step": 524
423
- },
424
- {
425
- "epoch": 4.0458015267175576,
426
- "grad_norm": 0.14600762724876404,
427
- "learning_rate": 1.2909090909090912e-05,
428
- "loss": 0.0222,
429
- "step": 530
430
- },
431
- {
432
- "epoch": 4.122137404580153,
433
- "grad_norm": 8.074915885925293,
434
- "learning_rate": 1.2743801652892563e-05,
435
- "loss": 0.0542,
436
- "step": 540
437
- },
438
- {
439
- "epoch": 4.198473282442748,
440
- "grad_norm": 0.09765351563692093,
441
- "learning_rate": 1.2578512396694216e-05,
442
- "loss": 0.0394,
443
- "step": 550
444
- },
445
- {
446
- "epoch": 4.2748091603053435,
447
- "grad_norm": 0.33209875226020813,
448
- "learning_rate": 1.2413223140495869e-05,
449
- "loss": 0.0241,
450
- "step": 560
451
- },
452
- {
453
- "epoch": 4.351145038167939,
454
- "grad_norm": 0.5309058427810669,
455
- "learning_rate": 1.2247933884297522e-05,
456
- "loss": 0.0161,
457
- "step": 570
458
- },
459
- {
460
- "epoch": 4.427480916030534,
461
- "grad_norm": 0.1629948765039444,
462
- "learning_rate": 1.2082644628099173e-05,
463
- "loss": 0.0129,
464
- "step": 580
465
- },
466
- {
467
- "epoch": 4.5038167938931295,
468
- "grad_norm": 0.15240447223186493,
469
- "learning_rate": 1.1917355371900828e-05,
470
- "loss": 0.0128,
471
- "step": 590
472
- },
473
- {
474
- "epoch": 4.580152671755725,
475
- "grad_norm": 0.10693137347698212,
476
- "learning_rate": 1.175206611570248e-05,
477
- "loss": 0.0724,
478
- "step": 600
479
- },
480
- {
481
- "epoch": 4.65648854961832,
482
- "grad_norm": 0.8860049843788147,
483
- "learning_rate": 1.1586776859504133e-05,
484
- "loss": 0.013,
485
- "step": 610
486
- },
487
- {
488
- "epoch": 4.732824427480916,
489
- "grad_norm": 1.1124643087387085,
490
- "learning_rate": 1.1421487603305785e-05,
491
- "loss": 0.0228,
492
- "step": 620
493
- },
494
- {
495
- "epoch": 4.809160305343512,
496
- "grad_norm": 16.63216209411621,
497
- "learning_rate": 1.125619834710744e-05,
498
- "loss": 0.1361,
499
- "step": 630
500
- },
501
- {
502
- "epoch": 4.885496183206107,
503
- "grad_norm": 0.22511304914951324,
504
- "learning_rate": 1.1090909090909092e-05,
505
- "loss": 0.0127,
506
- "step": 640
507
- },
508
- {
509
- "epoch": 4.961832061068702,
510
- "grad_norm": 0.2706206142902374,
511
- "learning_rate": 1.0925619834710745e-05,
512
- "loss": 0.0127,
513
- "step": 650
514
- },
515
- {
516
- "epoch": 5.0,
517
- "eval_accuracy": 0.9731285988483686,
518
- "eval_f1": 0.973064976375106,
519
- "eval_loss": 0.14903880655765533,
520
- "eval_precision": 0.9743006090972162,
521
- "eval_recall": 0.9731285988483686,
522
- "eval_runtime": 3.1635,
523
- "eval_samples_per_second": 164.69,
524
- "eval_steps_per_second": 10.431,
525
- "step": 655
526
- },
527
- {
528
- "epoch": 5.038167938931298,
529
- "grad_norm": 26.473268508911133,
530
- "learning_rate": 1.0760330578512396e-05,
531
- "loss": 0.0238,
532
- "step": 660
533
- },
534
- {
535
- "epoch": 5.114503816793893,
536
- "grad_norm": 0.08209118992090225,
537
- "learning_rate": 1.0595041322314051e-05,
538
- "loss": 0.0107,
539
- "step": 670
540
- },
541
- {
542
- "epoch": 5.190839694656488,
543
- "grad_norm": 1.1051641702651978,
544
- "learning_rate": 1.0429752066115704e-05,
545
- "loss": 0.0682,
546
- "step": 680
547
- },
548
- {
549
- "epoch": 5.267175572519084,
550
- "grad_norm": 11.310916900634766,
551
- "learning_rate": 1.0264462809917357e-05,
552
- "loss": 0.0131,
553
- "step": 690
554
- },
555
- {
556
- "epoch": 5.34351145038168,
557
- "grad_norm": 0.09134263545274734,
558
- "learning_rate": 1.0099173553719008e-05,
559
- "loss": 0.0428,
560
- "step": 700
561
- },
562
- {
563
- "epoch": 5.419847328244275,
564
- "grad_norm": 0.08655811846256256,
565
- "learning_rate": 9.933884297520661e-06,
566
- "loss": 0.0246,
567
- "step": 710
568
- },
569
- {
570
- "epoch": 5.4961832061068705,
571
- "grad_norm": 0.16410402953624725,
572
- "learning_rate": 9.768595041322316e-06,
573
- "loss": 0.0094,
574
- "step": 720
575
- },
576
- {
577
- "epoch": 5.572519083969466,
578
- "grad_norm": 1.349546194076538,
579
- "learning_rate": 9.603305785123967e-06,
580
- "loss": 0.045,
581
- "step": 730
582
- },
583
- {
584
- "epoch": 5.648854961832061,
585
- "grad_norm": 0.12257255613803864,
586
- "learning_rate": 9.438016528925621e-06,
587
- "loss": 0.0516,
588
- "step": 740
589
- },
590
- {
591
- "epoch": 5.7251908396946565,
592
- "grad_norm": 0.06910885125398636,
593
- "learning_rate": 9.272727272727273e-06,
594
- "loss": 0.009,
595
- "step": 750
596
- },
597
- {
598
- "epoch": 5.801526717557252,
599
- "grad_norm": 0.056132227182388306,
600
- "learning_rate": 9.107438016528927e-06,
601
- "loss": 0.0107,
602
- "step": 760
603
- },
604
- {
605
- "epoch": 5.877862595419847,
606
- "grad_norm": 0.07667958736419678,
607
- "learning_rate": 8.942148760330578e-06,
608
- "loss": 0.0086,
609
- "step": 770
610
- },
611
- {
612
- "epoch": 5.9541984732824424,
613
- "grad_norm": 0.10609736293554306,
614
- "learning_rate": 8.776859504132233e-06,
615
- "loss": 0.0085,
616
- "step": 780
617
- },
618
- {
619
- "epoch": 6.0,
620
- "eval_accuracy": 0.9788867562380038,
621
- "eval_f1": 0.9789720270641704,
622
- "eval_loss": 0.12155096977949142,
623
- "eval_precision": 0.9801130700504813,
624
- "eval_recall": 0.9788867562380038,
625
- "eval_runtime": 3.2955,
626
- "eval_samples_per_second": 158.096,
627
- "eval_steps_per_second": 10.014,
628
- "step": 786
629
- },
630
- {
631
- "epoch": 6.030534351145038,
632
- "grad_norm": 0.06408526748418808,
633
- "learning_rate": 8.611570247933884e-06,
634
- "loss": 0.0081,
635
- "step": 790
636
- },
637
- {
638
- "epoch": 6.106870229007634,
639
- "grad_norm": 0.07884930074214935,
640
- "learning_rate": 8.446280991735539e-06,
641
- "loss": 0.031,
642
- "step": 800
643
- },
644
- {
645
- "epoch": 6.183206106870229,
646
- "grad_norm": 0.07998275011777878,
647
- "learning_rate": 8.28099173553719e-06,
648
- "loss": 0.0453,
649
- "step": 810
650
- },
651
- {
652
- "epoch": 6.259541984732825,
653
- "grad_norm": 0.22578206658363342,
654
- "learning_rate": 8.115702479338843e-06,
655
- "loss": 0.0078,
656
- "step": 820
657
- },
658
- {
659
- "epoch": 6.33587786259542,
660
- "grad_norm": 0.07642875611782074,
661
- "learning_rate": 7.950413223140496e-06,
662
- "loss": 0.0086,
663
- "step": 830
664
- },
665
- {
666
- "epoch": 6.412213740458015,
667
- "grad_norm": 0.10305721312761307,
668
- "learning_rate": 7.785123966942149e-06,
669
- "loss": 0.0444,
670
- "step": 840
671
- },
672
- {
673
- "epoch": 6.488549618320611,
674
- "grad_norm": 0.0701122134923935,
675
- "learning_rate": 7.619834710743802e-06,
676
- "loss": 0.0077,
677
- "step": 850
678
- },
679
- {
680
- "epoch": 6.564885496183206,
681
- "grad_norm": 0.07119292765855789,
682
- "learning_rate": 7.454545454545456e-06,
683
- "loss": 0.0076,
684
- "step": 860
685
- },
686
- {
687
- "epoch": 6.641221374045801,
688
- "grad_norm": 0.2685672342777252,
689
- "learning_rate": 7.289256198347108e-06,
690
- "loss": 0.0077,
691
- "step": 870
692
- },
693
- {
694
- "epoch": 6.717557251908397,
695
- "grad_norm": 0.0628926083445549,
696
- "learning_rate": 7.1239669421487615e-06,
697
- "loss": 0.0072,
698
- "step": 880
699
- },
700
- {
701
- "epoch": 6.793893129770993,
702
- "grad_norm": 0.06299301236867905,
703
- "learning_rate": 6.9586776859504135e-06,
704
- "loss": 0.0109,
705
- "step": 890
706
- },
707
- {
708
- "epoch": 6.870229007633588,
709
- "grad_norm": 0.06120818480849266,
710
- "learning_rate": 6.793388429752067e-06,
711
- "loss": 0.0069,
712
- "step": 900
713
- },
714
- {
715
- "epoch": 6.9465648854961835,
716
- "grad_norm": 0.08700945228338242,
717
- "learning_rate": 6.628099173553719e-06,
718
- "loss": 0.0073,
719
- "step": 910
720
- },
721
- {
722
- "epoch": 7.0,
723
- "eval_accuracy": 0.980806142034549,
724
- "eval_f1": 0.980791368968265,
725
- "eval_loss": 0.11875477433204651,
726
- "eval_precision": 0.981936841149893,
727
- "eval_recall": 0.980806142034549,
728
- "eval_runtime": 3.1069,
729
- "eval_samples_per_second": 167.692,
730
- "eval_steps_per_second": 10.622,
731
- "step": 917
732
- },
733
- {
734
- "epoch": 7.022900763358779,
735
- "grad_norm": 0.061511170119047165,
736
- "learning_rate": 6.462809917355372e-06,
737
- "loss": 0.0066,
738
- "step": 920
739
- },
740
- {
741
- "epoch": 7.099236641221374,
742
- "grad_norm": 0.06128810718655586,
743
- "learning_rate": 6.297520661157025e-06,
744
- "loss": 0.0064,
745
- "step": 930
746
- },
747
- {
748
- "epoch": 7.175572519083969,
749
- "grad_norm": 0.05454257130622864,
750
- "learning_rate": 6.132231404958678e-06,
751
- "loss": 0.0066,
752
- "step": 940
753
- },
754
- {
755
- "epoch": 7.251908396946565,
756
- "grad_norm": 0.09356739372015,
757
- "learning_rate": 5.966942148760331e-06,
758
- "loss": 0.0065,
759
- "step": 950
760
- },
761
- {
762
- "epoch": 7.32824427480916,
763
- "grad_norm": 0.04699549078941345,
764
- "learning_rate": 5.801652892561984e-06,
765
- "loss": 0.006,
766
- "step": 960
767
- },
768
- {
769
- "epoch": 7.404580152671755,
770
- "grad_norm": 0.04597270488739014,
771
- "learning_rate": 5.636363636363636e-06,
772
- "loss": 0.0063,
773
- "step": 970
774
- },
775
- {
776
- "epoch": 7.480916030534351,
777
- "grad_norm": 0.05777190253138542,
778
- "learning_rate": 5.47107438016529e-06,
779
- "loss": 0.0057,
780
- "step": 980
781
- },
782
- {
783
- "epoch": 7.557251908396947,
784
- "grad_norm": 0.0520237572491169,
785
- "learning_rate": 5.305785123966942e-06,
786
- "loss": 0.006,
787
- "step": 990
788
- },
789
- {
790
- "epoch": 7.633587786259542,
791
- "grad_norm": 0.0427822545170784,
792
- "learning_rate": 5.140495867768596e-06,
793
- "loss": 0.0059,
794
- "step": 1000
795
- },
796
- {
797
- "epoch": 7.709923664122138,
798
- "grad_norm": 0.05699237063527107,
799
- "learning_rate": 4.975206611570249e-06,
800
- "loss": 0.0055,
801
- "step": 1010
802
- },
803
- {
804
- "epoch": 7.786259541984733,
805
- "grad_norm": 0.05885695666074753,
806
- "learning_rate": 4.8099173553719015e-06,
807
- "loss": 0.0258,
808
- "step": 1020
809
- },
810
- {
811
- "epoch": 7.862595419847328,
812
- "grad_norm": 0.05190462991595268,
813
- "learning_rate": 4.6446280991735544e-06,
814
- "loss": 0.0496,
815
- "step": 1030
816
- },
817
- {
818
- "epoch": 7.938931297709924,
819
- "grad_norm": 0.03909669816493988,
820
- "learning_rate": 4.479338842975207e-06,
821
- "loss": 0.0398,
822
- "step": 1040
823
- },
824
- {
825
- "epoch": 8.0,
826
- "eval_accuracy": 0.980806142034549,
827
- "eval_f1": 0.980791368968265,
828
- "eval_loss": 0.12089628726243973,
829
- "eval_precision": 0.981936841149893,
830
- "eval_recall": 0.980806142034549,
831
- "eval_runtime": 3.1129,
832
- "eval_samples_per_second": 167.366,
833
- "eval_steps_per_second": 10.601,
834
- "step": 1048
835
- },
836
- {
837
- "epoch": 8.01526717557252,
838
- "grad_norm": 0.05181822180747986,
839
- "learning_rate": 4.31404958677686e-06,
840
- "loss": 0.0062,
841
- "step": 1050
842
- },
843
- {
844
- "epoch": 8.091603053435115,
845
- "grad_norm": 0.03777517005801201,
846
- "learning_rate": 4.148760330578513e-06,
847
- "loss": 0.0058,
848
- "step": 1060
849
- },
850
- {
851
- "epoch": 8.16793893129771,
852
- "grad_norm": 0.04515732452273369,
853
- "learning_rate": 3.983471074380166e-06,
854
- "loss": 0.0056,
855
- "step": 1070
856
- },
857
- {
858
- "epoch": 8.244274809160306,
859
- "grad_norm": 0.044928282499313354,
860
- "learning_rate": 3.818181818181819e-06,
861
- "loss": 0.0055,
862
- "step": 1080
863
- },
864
- {
865
- "epoch": 8.320610687022901,
866
- "grad_norm": 0.05599347501993179,
867
- "learning_rate": 3.6528925619834715e-06,
868
- "loss": 0.0057,
869
- "step": 1090
870
- },
871
- {
872
- "epoch": 8.396946564885496,
873
- "grad_norm": 1.0466651916503906,
874
- "learning_rate": 3.4876033057851245e-06,
875
- "loss": 0.0384,
876
- "step": 1100
877
- },
878
- {
879
- "epoch": 8.473282442748092,
880
- "grad_norm": 0.05839056894183159,
881
- "learning_rate": 3.3223140495867774e-06,
882
- "loss": 0.0057,
883
- "step": 1110
884
- },
885
- {
886
- "epoch": 8.549618320610687,
887
- "grad_norm": 0.05969908460974693,
888
- "learning_rate": 3.1570247933884303e-06,
889
- "loss": 0.0424,
890
- "step": 1120
891
- },
892
- {
893
- "epoch": 8.625954198473282,
894
- "grad_norm": 0.06252706795930862,
895
- "learning_rate": 2.9917355371900832e-06,
896
- "loss": 0.0174,
897
- "step": 1130
898
- },
899
- {
900
- "epoch": 8.702290076335878,
901
- "grad_norm": 0.1538064330816269,
902
- "learning_rate": 2.8264462809917357e-06,
903
- "loss": 0.0058,
904
- "step": 1140
905
- },
906
- {
907
- "epoch": 8.778625954198473,
908
- "grad_norm": 0.05743182823061943,
909
- "learning_rate": 2.6611570247933886e-06,
910
- "loss": 0.0055,
911
- "step": 1150
912
- },
913
- {
914
- "epoch": 8.854961832061068,
915
- "grad_norm": 0.06665431708097458,
916
- "learning_rate": 2.4958677685950416e-06,
917
- "loss": 0.0057,
918
- "step": 1160
919
- },
920
- {
921
- "epoch": 8.931297709923664,
922
- "grad_norm": 0.07899218052625656,
923
- "learning_rate": 2.3305785123966945e-06,
924
- "loss": 0.0055,
925
- "step": 1170
926
- },
927
- {
928
- "epoch": 9.0,
929
- "eval_accuracy": 0.980806142034549,
930
- "eval_f1": 0.9807999304274743,
931
- "eval_loss": 0.12244618684053421,
932
- "eval_precision": 0.9819563131797131,
933
- "eval_recall": 0.980806142034549,
934
- "eval_runtime": 3.1404,
935
- "eval_samples_per_second": 165.902,
936
- "eval_steps_per_second": 10.508,
937
- "step": 1179
938
- },
939
- {
940
- "epoch": 9.007633587786259,
941
- "grad_norm": 0.03778070956468582,
942
- "learning_rate": 2.1652892561983474e-06,
943
- "loss": 0.0054,
944
- "step": 1180
945
- },
946
- {
947
- "epoch": 9.083969465648854,
948
- "grad_norm": 0.0397595539689064,
949
- "learning_rate": 2.0000000000000003e-06,
950
- "loss": 0.0074,
951
- "step": 1190
952
- },
953
- {
954
- "epoch": 9.16030534351145,
955
- "grad_norm": 0.03877999261021614,
956
- "learning_rate": 1.8347107438016533e-06,
957
- "loss": 0.0053,
958
- "step": 1200
959
- },
960
- {
961
- "epoch": 9.236641221374045,
962
- "grad_norm": 0.05299071595072746,
963
- "learning_rate": 1.669421487603306e-06,
964
- "loss": 0.0052,
965
- "step": 1210
966
- },
967
- {
968
- "epoch": 9.312977099236642,
969
- "grad_norm": 0.08447615802288055,
970
- "learning_rate": 1.5041322314049589e-06,
971
- "loss": 0.0053,
972
- "step": 1220
973
- },
974
- {
975
- "epoch": 9.389312977099237,
976
- "grad_norm": 0.05665115639567375,
977
- "learning_rate": 1.3388429752066118e-06,
978
- "loss": 0.0053,
979
- "step": 1230
980
- },
981
- {
982
- "epoch": 9.465648854961833,
983
- "grad_norm": 0.04538382589817047,
984
- "learning_rate": 1.1735537190082645e-06,
985
- "loss": 0.0055,
986
- "step": 1240
987
- },
988
- {
989
- "epoch": 9.541984732824428,
990
- "grad_norm": 0.04416332021355629,
991
- "learning_rate": 1.0082644628099174e-06,
992
- "loss": 0.0051,
993
- "step": 1250
994
- },
995
- {
996
- "epoch": 9.618320610687023,
997
- "grad_norm": 0.03218664601445198,
998
- "learning_rate": 8.429752066115703e-07,
999
- "loss": 0.0054,
1000
- "step": 1260
1001
- },
1002
- {
1003
- "epoch": 9.694656488549619,
1004
- "grad_norm": 0.05276583135128021,
1005
- "learning_rate": 6.776859504132232e-07,
1006
- "loss": 0.0052,
1007
- "step": 1270
1008
- },
1009
- {
1010
- "epoch": 9.770992366412214,
1011
- "grad_norm": 0.03935326635837555,
1012
- "learning_rate": 5.123966942148761e-07,
1013
- "loss": 0.0057,
1014
- "step": 1280
1015
- },
1016
- {
1017
- "epoch": 9.84732824427481,
1018
- "grad_norm": 0.05612946301698685,
1019
- "learning_rate": 3.4710743801652896e-07,
1020
- "loss": 0.0353,
1021
- "step": 1290
1022
- },
1023
- {
1024
- "epoch": 9.923664122137405,
1025
- "grad_norm": 0.03918612375855446,
1026
- "learning_rate": 1.8181818181818183e-07,
1027
- "loss": 0.005,
1028
- "step": 1300
1029
- },
1030
- {
1031
- "epoch": 10.0,
1032
- "grad_norm": 0.12464316934347153,
1033
- "learning_rate": 1.652892561983471e-08,
1034
- "loss": 0.0425,
1035
- "step": 1310
1036
- },
1037
- {
1038
- "epoch": 10.0,
1039
- "eval_accuracy": 0.982725527831094,
1040
- "eval_f1": 0.9827168072229782,
1041
- "eval_loss": 0.12131477892398834,
1042
- "eval_precision": 0.9837922474198865,
1043
- "eval_recall": 0.982725527831094,
1044
- "eval_runtime": 3.118,
1045
- "eval_samples_per_second": 167.095,
1046
- "eval_steps_per_second": 10.584,
1047
- "step": 1310
1048
- }
1049
- ],
1050
- "logging_steps": 10,
1051
- "max_steps": 1310,
1052
- "num_input_tokens_seen": 0,
1053
- "num_train_epochs": 10,
1054
- "save_steps": 500,
1055
- "stateful_callbacks": {
1056
- "EarlyStoppingCallback": {
1057
- "args": {
1058
- "early_stopping_patience": 3,
1059
- "early_stopping_threshold": 0.0
1060
- },
1061
- "attributes": {
1062
- "early_stopping_patience_counter": 0
1063
- }
1064
- },
1065
- "TrainerControl": {
1066
- "args": {
1067
- "should_epoch_stop": false,
1068
- "should_evaluate": false,
1069
- "should_log": false,
1070
- "should_save": true,
1071
- "should_training_stop": true
1072
- },
1073
- "attributes": {}
1074
- }
1075
- },
1076
- "total_flos": 1370372257351680.0,
1077
- "train_batch_size": 16,
1078
- "trial_name": null,
1079
- "trial_params": null
1080
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-1310/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:21a4ac2a72f23cb69080a4fb3a9a3266e6a76062c2c55904cad3d4237f62c83e
3
- size 5841