dmahata commited on
Commit
7e5f2d9
1 Parent(s): 5cf7d6e

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +475 -0
trainer_state.json ADDED
@@ -0,0 +1,475 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 2500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.4,
12
+ "learning_rate": 2.88e-05,
13
+ "loss": 0.3202,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.4,
18
+ "eval_accuracy": 0.8813009961455675,
19
+ "eval_f1": 0.16261325703385787,
20
+ "eval_loss": 0.24328218400478363,
21
+ "eval_precision": 0.2519394163280384,
22
+ "eval_recall": 0.12004928709734201,
23
+ "eval_runtime": 4.054,
24
+ "eval_samples_per_second": 123.334,
25
+ "eval_steps_per_second": 30.833,
26
+ "step": 100
27
+ },
28
+ {
29
+ "epoch": 0.8,
30
+ "learning_rate": 2.7600000000000003e-05,
31
+ "loss": 0.23,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.8,
36
+ "eval_accuracy": 0.9034639835811182,
37
+ "eval_f1": 0.49651100375738055,
38
+ "eval_loss": 0.21028906106948853,
39
+ "eval_precision": 0.5048208113516464,
40
+ "eval_recall": 0.48847033972892095,
41
+ "eval_runtime": 4.2572,
42
+ "eval_samples_per_second": 117.449,
43
+ "eval_steps_per_second": 29.362,
44
+ "step": 200
45
+ },
46
+ {
47
+ "epoch": 1.2,
48
+ "learning_rate": 2.64e-05,
49
+ "loss": 0.2013,
50
+ "step": 300
51
+ },
52
+ {
53
+ "epoch": 1.2,
54
+ "eval_accuracy": 0.8792611503228713,
55
+ "eval_f1": 0.5627305035874741,
56
+ "eval_loss": 0.2621181607246399,
57
+ "eval_precision": 0.4545060658578856,
58
+ "eval_recall": 0.7386023587396585,
59
+ "eval_runtime": 4.1357,
60
+ "eval_samples_per_second": 120.897,
61
+ "eval_steps_per_second": 30.224,
62
+ "step": 300
63
+ },
64
+ {
65
+ "epoch": 1.6,
66
+ "learning_rate": 2.52e-05,
67
+ "loss": 0.1874,
68
+ "step": 400
69
+ },
70
+ {
71
+ "epoch": 1.6,
72
+ "eval_accuracy": 0.893452470340892,
73
+ "eval_f1": 0.5619149696320114,
74
+ "eval_loss": 0.2326020449399948,
75
+ "eval_precision": 0.47293721433726243,
76
+ "eval_recall": 0.6921316669600422,
77
+ "eval_runtime": 4.241,
78
+ "eval_samples_per_second": 117.897,
79
+ "eval_steps_per_second": 29.474,
80
+ "step": 400
81
+ },
82
+ {
83
+ "epoch": 2.0,
84
+ "learning_rate": 2.4e-05,
85
+ "loss": 0.1847,
86
+ "step": 500
87
+ },
88
+ {
89
+ "epoch": 2.0,
90
+ "eval_accuracy": 0.9060920058066777,
91
+ "eval_f1": 0.5619039721369932,
92
+ "eval_loss": 0.20794397592544556,
93
+ "eval_precision": 0.5312010034493572,
94
+ "eval_recall": 0.5963738778384088,
95
+ "eval_runtime": 4.124,
96
+ "eval_samples_per_second": 121.24,
97
+ "eval_steps_per_second": 30.31,
98
+ "step": 500
99
+ },
100
+ {
101
+ "epoch": 2.4,
102
+ "learning_rate": 2.2800000000000002e-05,
103
+ "loss": 0.1567,
104
+ "step": 600
105
+ },
106
+ {
107
+ "epoch": 2.4,
108
+ "eval_accuracy": 0.9071932722631025,
109
+ "eval_f1": 0.503008186211658,
110
+ "eval_loss": 0.23015139997005463,
111
+ "eval_precision": 0.5720053835800808,
112
+ "eval_recall": 0.4488646365076571,
113
+ "eval_runtime": 4.1201,
114
+ "eval_samples_per_second": 121.355,
115
+ "eval_steps_per_second": 30.339,
116
+ "step": 600
117
+ },
118
+ {
119
+ "epoch": 2.8,
120
+ "learning_rate": 2.16e-05,
121
+ "loss": 0.1484,
122
+ "step": 700
123
+ },
124
+ {
125
+ "epoch": 2.8,
126
+ "eval_accuracy": 0.9038269009360765,
127
+ "eval_f1": 0.5580642412882338,
128
+ "eval_loss": 0.22997109591960907,
129
+ "eval_precision": 0.540785997357992,
130
+ "eval_recall": 0.5764830135539518,
131
+ "eval_runtime": 4.134,
132
+ "eval_samples_per_second": 120.948,
133
+ "eval_steps_per_second": 30.237,
134
+ "step": 700
135
+ },
136
+ {
137
+ "epoch": 3.2,
138
+ "learning_rate": 2.04e-05,
139
+ "loss": 0.1388,
140
+ "step": 800
141
+ },
142
+ {
143
+ "epoch": 3.2,
144
+ "eval_accuracy": 0.9075561896180607,
145
+ "eval_f1": 0.5364304509572634,
146
+ "eval_loss": 0.2365296632051468,
147
+ "eval_precision": 0.5535580524344569,
148
+ "eval_recall": 0.5203309276535821,
149
+ "eval_runtime": 4.1587,
150
+ "eval_samples_per_second": 120.229,
151
+ "eval_steps_per_second": 30.057,
152
+ "step": 800
153
+ },
154
+ {
155
+ "epoch": 3.6,
156
+ "learning_rate": 1.9200000000000003e-05,
157
+ "loss": 0.1191,
158
+ "step": 900
159
+ },
160
+ {
161
+ "epoch": 3.6,
162
+ "eval_accuracy": 0.9067302397757421,
163
+ "eval_f1": 0.574726200505476,
164
+ "eval_loss": 0.26086461544036865,
165
+ "eval_precision": 0.5511391177896268,
166
+ "eval_recall": 0.6004224608343601,
167
+ "eval_runtime": 4.1544,
168
+ "eval_samples_per_second": 120.354,
169
+ "eval_steps_per_second": 30.088,
170
+ "step": 900
171
+ },
172
+ {
173
+ "epoch": 4.0,
174
+ "learning_rate": 1.8e-05,
175
+ "loss": 0.1193,
176
+ "step": 1000
177
+ },
178
+ {
179
+ "epoch": 4.0,
180
+ "eval_accuracy": 0.9059543474996246,
181
+ "eval_f1": 0.5809305373525557,
182
+ "eval_loss": 0.25283825397491455,
183
+ "eval_precision": 0.543281752719473,
184
+ "eval_recall": 0.6241858827671185,
185
+ "eval_runtime": 4.1474,
186
+ "eval_samples_per_second": 120.557,
187
+ "eval_steps_per_second": 30.139,
188
+ "step": 1000
189
+ },
190
+ {
191
+ "epoch": 4.4,
192
+ "learning_rate": 1.6800000000000002e-05,
193
+ "loss": 0.088,
194
+ "step": 1100
195
+ },
196
+ {
197
+ "epoch": 4.4,
198
+ "eval_accuracy": 0.9037142714121239,
199
+ "eval_f1": 0.5845009103142563,
200
+ "eval_loss": 0.2839806079864502,
201
+ "eval_precision": 0.5310701956271576,
202
+ "eval_recall": 0.6498855835240275,
203
+ "eval_runtime": 4.1556,
204
+ "eval_samples_per_second": 120.318,
205
+ "eval_steps_per_second": 30.08,
206
+ "step": 1100
207
+ },
208
+ {
209
+ "epoch": 4.8,
210
+ "learning_rate": 1.56e-05,
211
+ "loss": 0.0924,
212
+ "step": 1200
213
+ },
214
+ {
215
+ "epoch": 4.8,
216
+ "eval_accuracy": 0.9085197977674325,
217
+ "eval_f1": 0.5776627856834843,
218
+ "eval_loss": 0.27629220485687256,
219
+ "eval_precision": 0.5662833953331079,
220
+ "eval_recall": 0.5895088892800563,
221
+ "eval_runtime": 4.1675,
222
+ "eval_samples_per_second": 119.975,
223
+ "eval_steps_per_second": 29.994,
224
+ "step": 1200
225
+ },
226
+ {
227
+ "epoch": 5.2,
228
+ "learning_rate": 1.44e-05,
229
+ "loss": 0.0834,
230
+ "step": 1300
231
+ },
232
+ {
233
+ "epoch": 5.2,
234
+ "eval_accuracy": 0.9037267858036743,
235
+ "eval_f1": 0.5866475003992974,
236
+ "eval_loss": 0.332010954618454,
237
+ "eval_precision": 0.5369098085075281,
238
+ "eval_recall": 0.6465411019186763,
239
+ "eval_runtime": 4.1738,
240
+ "eval_samples_per_second": 119.795,
241
+ "eval_steps_per_second": 29.949,
242
+ "step": 1300
243
+ },
244
+ {
245
+ "epoch": 5.6,
246
+ "learning_rate": 1.32e-05,
247
+ "loss": 0.0654,
248
+ "step": 1400
249
+ },
250
+ {
251
+ "epoch": 5.6,
252
+ "eval_accuracy": 0.9057416028432698,
253
+ "eval_f1": 0.574710687542546,
254
+ "eval_loss": 0.32423922419548035,
255
+ "eval_precision": 0.5562510294844342,
256
+ "eval_recall": 0.5944375990142581,
257
+ "eval_runtime": 4.1539,
258
+ "eval_samples_per_second": 120.368,
259
+ "eval_steps_per_second": 30.092,
260
+ "step": 1400
261
+ },
262
+ {
263
+ "epoch": 6.0,
264
+ "learning_rate": 1.2e-05,
265
+ "loss": 0.0689,
266
+ "step": 1500
267
+ },
268
+ {
269
+ "epoch": 6.0,
270
+ "eval_accuracy": 0.9046403363868448,
271
+ "eval_f1": 0.5581112750629285,
272
+ "eval_loss": 0.31789475679397583,
273
+ "eval_precision": 0.550513698630137,
274
+ "eval_recall": 0.5659214926949481,
275
+ "eval_runtime": 4.1716,
276
+ "eval_samples_per_second": 119.859,
277
+ "eval_steps_per_second": 29.965,
278
+ "step": 1500
279
+ },
280
+ {
281
+ "epoch": 6.4,
282
+ "learning_rate": 1.08e-05,
283
+ "loss": 0.0498,
284
+ "step": 1600
285
+ },
286
+ {
287
+ "epoch": 6.4,
288
+ "eval_accuracy": 0.9053661710967613,
289
+ "eval_f1": 0.5820808768579258,
290
+ "eval_loss": 0.38915345072746277,
291
+ "eval_precision": 0.5509273813266269,
292
+ "eval_recall": 0.6169688435134659,
293
+ "eval_runtime": 4.1814,
294
+ "eval_samples_per_second": 119.577,
295
+ "eval_steps_per_second": 29.894,
296
+ "step": 1600
297
+ },
298
+ {
299
+ "epoch": 6.8,
300
+ "learning_rate": 9.600000000000001e-06,
301
+ "loss": 0.0528,
302
+ "step": 1700
303
+ },
304
+ {
305
+ "epoch": 6.8,
306
+ "eval_accuracy": 0.9048155378685488,
307
+ "eval_f1": 0.5776866283839212,
308
+ "eval_loss": 0.3601633608341217,
309
+ "eval_precision": 0.5409433092640958,
310
+ "eval_recall": 0.619785249075867,
311
+ "eval_runtime": 4.1473,
312
+ "eval_samples_per_second": 120.56,
313
+ "eval_steps_per_second": 30.14,
314
+ "step": 1700
315
+ },
316
+ {
317
+ "epoch": 7.2,
318
+ "learning_rate": 8.400000000000001e-06,
319
+ "loss": 0.0474,
320
+ "step": 1800
321
+ },
322
+ {
323
+ "epoch": 7.2,
324
+ "eval_accuracy": 0.9040396455924313,
325
+ "eval_f1": 0.5793253173012691,
326
+ "eval_loss": 0.39758625626564026,
327
+ "eval_precision": 0.5510722795869738,
328
+ "eval_recall": 0.6106319309980637,
329
+ "eval_runtime": 4.1737,
330
+ "eval_samples_per_second": 119.798,
331
+ "eval_steps_per_second": 29.949,
332
+ "step": 1800
333
+ },
334
+ {
335
+ "epoch": 7.6,
336
+ "learning_rate": 7.2e-06,
337
+ "loss": 0.039,
338
+ "step": 1900
339
+ },
340
+ {
341
+ "epoch": 7.6,
342
+ "eval_accuracy": 0.9035766131050709,
343
+ "eval_f1": 0.5778368499750789,
344
+ "eval_loss": 0.4138449728488922,
345
+ "eval_precision": 0.5471134182790625,
346
+ "eval_recall": 0.6122161591269143,
347
+ "eval_runtime": 4.1525,
348
+ "eval_samples_per_second": 120.408,
349
+ "eval_steps_per_second": 30.102,
350
+ "step": 1900
351
+ },
352
+ {
353
+ "epoch": 8.0,
354
+ "learning_rate": 6e-06,
355
+ "loss": 0.0446,
356
+ "step": 2000
357
+ },
358
+ {
359
+ "epoch": 8.0,
360
+ "eval_accuracy": 0.9039520448515793,
361
+ "eval_f1": 0.5882447535579319,
362
+ "eval_loss": 0.408151775598526,
363
+ "eval_precision": 0.5414446417998816,
364
+ "eval_recall": 0.6439007217039253,
365
+ "eval_runtime": 4.1562,
366
+ "eval_samples_per_second": 120.303,
367
+ "eval_steps_per_second": 30.076,
368
+ "step": 2000
369
+ },
370
+ {
371
+ "epoch": 8.4,
372
+ "learning_rate": 4.800000000000001e-06,
373
+ "loss": 0.0333,
374
+ "step": 2100
375
+ },
376
+ {
377
+ "epoch": 8.4,
378
+ "eval_accuracy": 0.9046528507783952,
379
+ "eval_f1": 0.5720617062984743,
380
+ "eval_loss": 0.4318484365940094,
381
+ "eval_precision": 0.5545274289491078,
382
+ "eval_recall": 0.5907410667136067,
383
+ "eval_runtime": 4.1724,
384
+ "eval_samples_per_second": 119.834,
385
+ "eval_steps_per_second": 29.959,
386
+ "step": 2100
387
+ },
388
+ {
389
+ "epoch": 8.8,
390
+ "learning_rate": 3.6e-06,
391
+ "loss": 0.0327,
392
+ "step": 2200
393
+ },
394
+ {
395
+ "epoch": 8.8,
396
+ "eval_accuracy": 0.9054913150122641,
397
+ "eval_f1": 0.5734657499363381,
398
+ "eval_loss": 0.4232546091079712,
399
+ "eval_precision": 0.5537704918032786,
400
+ "eval_recall": 0.5946136243619081,
401
+ "eval_runtime": 4.1536,
402
+ "eval_samples_per_second": 120.378,
403
+ "eval_steps_per_second": 30.095,
404
+ "step": 2200
405
+ },
406
+ {
407
+ "epoch": 9.2,
408
+ "learning_rate": 2.4000000000000003e-06,
409
+ "loss": 0.03,
410
+ "step": 2300
411
+ },
412
+ {
413
+ "epoch": 9.2,
414
+ "eval_accuracy": 0.9049782249587025,
415
+ "eval_f1": 0.5769523005487548,
416
+ "eval_loss": 0.44003215432167053,
417
+ "eval_precision": 0.5543478260869565,
418
+ "eval_recall": 0.6014786129202605,
419
+ "eval_runtime": 4.2605,
420
+ "eval_samples_per_second": 117.358,
421
+ "eval_steps_per_second": 29.339,
422
+ "step": 2300
423
+ },
424
+ {
425
+ "epoch": 9.6,
426
+ "learning_rate": 1.2000000000000002e-06,
427
+ "loss": 0.0286,
428
+ "step": 2400
429
+ },
430
+ {
431
+ "epoch": 9.6,
432
+ "eval_accuracy": 0.9048280522600991,
433
+ "eval_f1": 0.5807528586929305,
434
+ "eval_loss": 0.4442707598209381,
435
+ "eval_precision": 0.5522222222222222,
436
+ "eval_recall": 0.6123921844745643,
437
+ "eval_runtime": 4.3652,
438
+ "eval_samples_per_second": 114.542,
439
+ "eval_steps_per_second": 28.636,
440
+ "step": 2400
441
+ },
442
+ {
443
+ "epoch": 10.0,
444
+ "learning_rate": 0.0,
445
+ "loss": 0.0261,
446
+ "step": 2500
447
+ },
448
+ {
449
+ "epoch": 10.0,
450
+ "eval_accuracy": 0.9050407969164539,
451
+ "eval_f1": 0.5811535881958416,
452
+ "eval_loss": 0.4490407407283783,
453
+ "eval_precision": 0.5548263166319833,
454
+ "eval_recall": 0.6101038549551135,
455
+ "eval_runtime": 4.2364,
456
+ "eval_samples_per_second": 118.025,
457
+ "eval_steps_per_second": 29.506,
458
+ "step": 2500
459
+ },
460
+ {
461
+ "epoch": 10.0,
462
+ "step": 2500,
463
+ "total_flos": 2612991191040000.0,
464
+ "train_loss": 0.10352115373611451,
465
+ "train_runtime": 364.2147,
466
+ "train_samples_per_second": 27.456,
467
+ "train_steps_per_second": 6.864
468
+ }
469
+ ],
470
+ "max_steps": 2500,
471
+ "num_train_epochs": 10,
472
+ "total_flos": 2612991191040000.0,
473
+ "trial_name": null,
474
+ "trial_params": null
475
+ }