zswwsz commited on
Commit
5e4c509
1 Parent(s): e404d7d

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. model/trainer_state.json +478 -0
model/trainer_state.json ADDED
@@ -0,0 +1,478 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.1016548871994019,
3
+ "best_model_checkpoint": "./morror_art/result/reliable_emotion_result_5_24/checkpoint-3500",
4
+ "epoch": 24.475524475524477,
5
+ "global_step": 3500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.7,
12
+ "eval_accuracy": 0.42132269099201825,
13
+ "eval_f1": 0.10717554161400515,
14
+ "eval_loss": 1.556437373161316,
15
+ "eval_precision": 0.12050089346754063,
16
+ "eval_recall": 0.16979244830587129,
17
+ "eval_runtime": 4.4465,
18
+ "eval_samples_per_second": 394.471,
19
+ "eval_steps_per_second": 12.369,
20
+ "step": 100
21
+ },
22
+ {
23
+ "epoch": 1.4,
24
+ "eval_accuracy": 0.427594070695553,
25
+ "eval_f1": 0.12040810633250204,
26
+ "eval_loss": 1.4812891483306885,
27
+ "eval_precision": 0.2905383917464059,
28
+ "eval_recall": 0.177815771059219,
29
+ "eval_runtime": 4.4673,
30
+ "eval_samples_per_second": 392.635,
31
+ "eval_steps_per_second": 12.312,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 2.1,
36
+ "eval_accuracy": 0.4372862029646522,
37
+ "eval_f1": 0.14546683883549175,
38
+ "eval_loss": 1.4237111806869507,
39
+ "eval_precision": 0.42208362471970823,
40
+ "eval_recall": 0.19276600654371012,
41
+ "eval_runtime": 4.4781,
42
+ "eval_samples_per_second": 391.684,
43
+ "eval_steps_per_second": 12.282,
44
+ "step": 300
45
+ },
46
+ {
47
+ "epoch": 2.8,
48
+ "eval_accuracy": 0.4697833523375142,
49
+ "eval_f1": 0.210481031576959,
50
+ "eval_loss": 1.371217966079712,
51
+ "eval_precision": 0.5062004262035059,
52
+ "eval_recall": 0.24142832815118861,
53
+ "eval_runtime": 4.4826,
54
+ "eval_samples_per_second": 391.288,
55
+ "eval_steps_per_second": 12.27,
56
+ "step": 400
57
+ },
58
+ {
59
+ "epoch": 3.5,
60
+ "learning_rate": 8.601398601398601e-07,
61
+ "loss": 1.5085,
62
+ "step": 500
63
+ },
64
+ {
65
+ "epoch": 3.5,
66
+ "eval_accuracy": 0.5096921322690992,
67
+ "eval_f1": 0.2991932681262163,
68
+ "eval_loss": 1.3205866813659668,
69
+ "eval_precision": 0.5396425394462333,
70
+ "eval_recall": 0.31188637043469,
71
+ "eval_runtime": 4.4769,
72
+ "eval_samples_per_second": 391.786,
73
+ "eval_steps_per_second": 12.285,
74
+ "step": 500
75
+ },
76
+ {
77
+ "epoch": 4.2,
78
+ "eval_accuracy": 0.5313568985176739,
79
+ "eval_f1": 0.3432001047531137,
80
+ "eval_loss": 1.2814286947250366,
81
+ "eval_precision": 0.5476304744481882,
82
+ "eval_recall": 0.3460957578100579,
83
+ "eval_runtime": 4.4777,
84
+ "eval_samples_per_second": 391.715,
85
+ "eval_steps_per_second": 12.283,
86
+ "step": 600
87
+ },
88
+ {
89
+ "epoch": 4.9,
90
+ "eval_accuracy": 0.5478905359179019,
91
+ "eval_f1": 0.36364585622645745,
92
+ "eval_loss": 1.244052767753601,
93
+ "eval_precision": 0.5760825095110559,
94
+ "eval_recall": 0.3636528820286021,
95
+ "eval_runtime": 4.4848,
96
+ "eval_samples_per_second": 391.1,
97
+ "eval_steps_per_second": 12.264,
98
+ "step": 700
99
+ },
100
+ {
101
+ "epoch": 5.59,
102
+ "eval_accuracy": 0.564994298745724,
103
+ "eval_f1": 0.404480047810517,
104
+ "eval_loss": 1.2141515016555786,
105
+ "eval_precision": 0.5735029461920788,
106
+ "eval_recall": 0.4043742479016447,
107
+ "eval_runtime": 4.4772,
108
+ "eval_samples_per_second": 391.765,
109
+ "eval_steps_per_second": 12.285,
110
+ "step": 800
111
+ },
112
+ {
113
+ "epoch": 6.29,
114
+ "eval_accuracy": 0.5712656784492588,
115
+ "eval_f1": 0.4208653100294366,
116
+ "eval_loss": 1.1893807649612427,
117
+ "eval_precision": 0.5743175749828859,
118
+ "eval_recall": 0.4133799742552944,
119
+ "eval_runtime": 4.4767,
120
+ "eval_samples_per_second": 391.81,
121
+ "eval_steps_per_second": 12.286,
122
+ "step": 900
123
+ },
124
+ {
125
+ "epoch": 6.99,
126
+ "learning_rate": 7.202797202797203e-07,
127
+ "loss": 1.2175,
128
+ "step": 1000
129
+ },
130
+ {
131
+ "epoch": 6.99,
132
+ "eval_accuracy": 0.580387685290764,
133
+ "eval_f1": 0.4371991202067779,
134
+ "eval_loss": 1.1725378036499023,
135
+ "eval_precision": 0.5842634392918301,
136
+ "eval_recall": 0.4298726634765053,
137
+ "eval_runtime": 4.4764,
138
+ "eval_samples_per_second": 391.835,
139
+ "eval_steps_per_second": 12.287,
140
+ "step": 1000
141
+ },
142
+ {
143
+ "epoch": 7.69,
144
+ "eval_accuracy": 0.5809578107183581,
145
+ "eval_f1": 0.4408274702869188,
146
+ "eval_loss": 1.1620036363601685,
147
+ "eval_precision": 0.5854335983757126,
148
+ "eval_recall": 0.43482698804326453,
149
+ "eval_runtime": 4.486,
150
+ "eval_samples_per_second": 390.994,
151
+ "eval_steps_per_second": 12.26,
152
+ "step": 1100
153
+ },
154
+ {
155
+ "epoch": 8.39,
156
+ "eval_accuracy": 0.5838084378563284,
157
+ "eval_f1": 0.4472584192373395,
158
+ "eval_loss": 1.149905800819397,
159
+ "eval_precision": 0.595369432980278,
160
+ "eval_recall": 0.4366262493617213,
161
+ "eval_runtime": 4.4863,
162
+ "eval_samples_per_second": 390.966,
163
+ "eval_steps_per_second": 12.259,
164
+ "step": 1200
165
+ },
166
+ {
167
+ "epoch": 9.09,
168
+ "eval_accuracy": 0.5838084378563284,
169
+ "eval_f1": 0.46451227241636794,
170
+ "eval_loss": 1.1461801528930664,
171
+ "eval_precision": 0.5876375812965753,
172
+ "eval_recall": 0.4579534952687372,
173
+ "eval_runtime": 4.4856,
174
+ "eval_samples_per_second": 391.032,
175
+ "eval_steps_per_second": 12.262,
176
+ "step": 1300
177
+ },
178
+ {
179
+ "epoch": 9.79,
180
+ "eval_accuracy": 0.5923603192702395,
181
+ "eval_f1": 0.4734477369487841,
182
+ "eval_loss": 1.1318447589874268,
183
+ "eval_precision": 0.5840368383919669,
184
+ "eval_recall": 0.45527029567907323,
185
+ "eval_runtime": 4.4831,
186
+ "eval_samples_per_second": 391.251,
187
+ "eval_steps_per_second": 12.268,
188
+ "step": 1400
189
+ },
190
+ {
191
+ "epoch": 10.49,
192
+ "learning_rate": 5.804195804195804e-07,
193
+ "loss": 1.0943,
194
+ "step": 1500
195
+ },
196
+ {
197
+ "epoch": 10.49,
198
+ "eval_accuracy": 0.5866590649942988,
199
+ "eval_f1": 0.4727808386064695,
200
+ "eval_loss": 1.129315972328186,
201
+ "eval_precision": 0.5761186775852539,
202
+ "eval_recall": 0.46110880687723116,
203
+ "eval_runtime": 4.4785,
204
+ "eval_samples_per_second": 391.652,
205
+ "eval_steps_per_second": 12.281,
206
+ "step": 1500
207
+ },
208
+ {
209
+ "epoch": 11.19,
210
+ "eval_accuracy": 0.5935005701254276,
211
+ "eval_f1": 0.47528702119550764,
212
+ "eval_loss": 1.123066782951355,
213
+ "eval_precision": 0.5787843440593811,
214
+ "eval_recall": 0.4609284974767651,
215
+ "eval_runtime": 4.4827,
216
+ "eval_samples_per_second": 391.283,
217
+ "eval_steps_per_second": 12.269,
218
+ "step": 1600
219
+ },
220
+ {
221
+ "epoch": 11.89,
222
+ "eval_accuracy": 0.5946408209806158,
223
+ "eval_f1": 0.47912626791425633,
224
+ "eval_loss": 1.1193214654922485,
225
+ "eval_precision": 0.5890459278861226,
226
+ "eval_recall": 0.4627384901015755,
227
+ "eval_runtime": 4.4898,
228
+ "eval_samples_per_second": 390.659,
229
+ "eval_steps_per_second": 12.25,
230
+ "step": 1700
231
+ },
232
+ {
233
+ "epoch": 12.59,
234
+ "eval_accuracy": 0.5940706955530216,
235
+ "eval_f1": 0.482317247304243,
236
+ "eval_loss": 1.1152013540267944,
237
+ "eval_precision": 0.586478359238947,
238
+ "eval_recall": 0.4654431113148698,
239
+ "eval_runtime": 4.4731,
240
+ "eval_samples_per_second": 392.124,
241
+ "eval_steps_per_second": 12.296,
242
+ "step": 1800
243
+ },
244
+ {
245
+ "epoch": 13.29,
246
+ "eval_accuracy": 0.5963511972633979,
247
+ "eval_f1": 0.4920536270108653,
248
+ "eval_loss": 1.1146811246871948,
249
+ "eval_precision": 0.5965239670211226,
250
+ "eval_recall": 0.47540949699570006,
251
+ "eval_runtime": 4.4839,
252
+ "eval_samples_per_second": 391.176,
253
+ "eval_steps_per_second": 12.266,
254
+ "step": 1900
255
+ },
256
+ {
257
+ "epoch": 13.99,
258
+ "learning_rate": 4.4055944055944054e-07,
259
+ "loss": 1.0322,
260
+ "step": 2000
261
+ },
262
+ {
263
+ "epoch": 13.99,
264
+ "eval_accuracy": 0.5997719498289624,
265
+ "eval_f1": 0.5052664281764545,
266
+ "eval_loss": 1.1116981506347656,
267
+ "eval_precision": 0.5812726767544368,
268
+ "eval_recall": 0.4881922967280751,
269
+ "eval_runtime": 4.4778,
270
+ "eval_samples_per_second": 391.711,
271
+ "eval_steps_per_second": 12.283,
272
+ "step": 2000
273
+ },
274
+ {
275
+ "epoch": 14.69,
276
+ "eval_accuracy": 0.5980615735461802,
277
+ "eval_f1": 0.4950733822245435,
278
+ "eval_loss": 1.1113407611846924,
279
+ "eval_precision": 0.5921759445453222,
280
+ "eval_recall": 0.476526522934686,
281
+ "eval_runtime": 4.4867,
282
+ "eval_samples_per_second": 390.933,
283
+ "eval_steps_per_second": 12.258,
284
+ "step": 2100
285
+ },
286
+ {
287
+ "epoch": 15.38,
288
+ "eval_accuracy": 0.6031927023945268,
289
+ "eval_f1": 0.5080542080295547,
290
+ "eval_loss": 1.106334924697876,
291
+ "eval_precision": 0.5878492556075186,
292
+ "eval_recall": 0.4882540980391026,
293
+ "eval_runtime": 4.4804,
294
+ "eval_samples_per_second": 391.485,
295
+ "eval_steps_per_second": 12.276,
296
+ "step": 2200
297
+ },
298
+ {
299
+ "epoch": 16.08,
300
+ "eval_accuracy": 0.5986316989737742,
301
+ "eval_f1": 0.5064417139041212,
302
+ "eval_loss": 1.1066973209381104,
303
+ "eval_precision": 0.5812971550501614,
304
+ "eval_recall": 0.48766556813559586,
305
+ "eval_runtime": 4.4833,
306
+ "eval_samples_per_second": 391.232,
307
+ "eval_steps_per_second": 12.268,
308
+ "step": 2300
309
+ },
310
+ {
311
+ "epoch": 16.78,
312
+ "eval_accuracy": 0.5992018244013683,
313
+ "eval_f1": 0.5144759419665945,
314
+ "eval_loss": 1.1088467836380005,
315
+ "eval_precision": 0.5842555668149519,
316
+ "eval_recall": 0.4977201010470285,
317
+ "eval_runtime": 4.485,
318
+ "eval_samples_per_second": 391.086,
319
+ "eval_steps_per_second": 12.263,
320
+ "step": 2400
321
+ },
322
+ {
323
+ "epoch": 17.48,
324
+ "learning_rate": 3.0069930069930065e-07,
325
+ "loss": 0.9904,
326
+ "step": 2500
327
+ },
328
+ {
329
+ "epoch": 17.48,
330
+ "eval_accuracy": 0.604903078677309,
331
+ "eval_f1": 0.5138417068927378,
332
+ "eval_loss": 1.103458285331726,
333
+ "eval_precision": 0.5926788541581886,
334
+ "eval_recall": 0.49161294220421387,
335
+ "eval_runtime": 4.4916,
336
+ "eval_samples_per_second": 390.504,
337
+ "eval_steps_per_second": 12.245,
338
+ "step": 2500
339
+ },
340
+ {
341
+ "epoch": 18.18,
342
+ "eval_accuracy": 0.6009122006841505,
343
+ "eval_f1": 0.5128255668408088,
344
+ "eval_loss": 1.1030323505401611,
345
+ "eval_precision": 0.5854725243604096,
346
+ "eval_recall": 0.49273911177944535,
347
+ "eval_runtime": 4.4865,
348
+ "eval_samples_per_second": 390.952,
349
+ "eval_steps_per_second": 12.259,
350
+ "step": 2600
351
+ },
352
+ {
353
+ "epoch": 18.88,
354
+ "eval_accuracy": 0.6043329532497149,
355
+ "eval_f1": 0.5182170922509549,
356
+ "eval_loss": 1.103196144104004,
357
+ "eval_precision": 0.5849346509995733,
358
+ "eval_recall": 0.5000349447222631,
359
+ "eval_runtime": 4.4934,
360
+ "eval_samples_per_second": 390.352,
361
+ "eval_steps_per_second": 12.24,
362
+ "step": 2700
363
+ },
364
+ {
365
+ "epoch": 19.58,
366
+ "eval_accuracy": 0.6037628278221209,
367
+ "eval_f1": 0.5202164822232764,
368
+ "eval_loss": 1.101340889930725,
369
+ "eval_precision": 0.5858419257130305,
370
+ "eval_recall": 0.4998947792675687,
371
+ "eval_runtime": 4.5047,
372
+ "eval_samples_per_second": 389.368,
373
+ "eval_steps_per_second": 12.209,
374
+ "step": 2800
375
+ },
376
+ {
377
+ "epoch": 20.28,
378
+ "eval_accuracy": 0.5980615735461802,
379
+ "eval_f1": 0.5123717547824674,
380
+ "eval_loss": 1.1047625541687012,
381
+ "eval_precision": 0.578761691251246,
382
+ "eval_recall": 0.49570507510335854,
383
+ "eval_runtime": 4.4747,
384
+ "eval_samples_per_second": 391.982,
385
+ "eval_steps_per_second": 12.291,
386
+ "step": 2900
387
+ },
388
+ {
389
+ "epoch": 20.98,
390
+ "learning_rate": 1.6083916083916083e-07,
391
+ "loss": 0.9668,
392
+ "step": 3000
393
+ },
394
+ {
395
+ "epoch": 20.98,
396
+ "eval_accuracy": 0.604903078677309,
397
+ "eval_f1": 0.5202602503811038,
398
+ "eval_loss": 1.101715087890625,
399
+ "eval_precision": 0.5850990723622214,
400
+ "eval_recall": 0.5003182700966798,
401
+ "eval_runtime": 4.4687,
402
+ "eval_samples_per_second": 392.504,
403
+ "eval_steps_per_second": 12.308,
404
+ "step": 3000
405
+ },
406
+ {
407
+ "epoch": 21.68,
408
+ "eval_accuracy": 0.5969213226909921,
409
+ "eval_f1": 0.5087091015932135,
410
+ "eval_loss": 1.1041690111160278,
411
+ "eval_precision": 0.5776317246221271,
412
+ "eval_recall": 0.49085484392653084,
413
+ "eval_runtime": 4.4913,
414
+ "eval_samples_per_second": 390.529,
415
+ "eval_steps_per_second": 12.246,
416
+ "step": 3100
417
+ },
418
+ {
419
+ "epoch": 22.38,
420
+ "eval_accuracy": 0.5974914481185861,
421
+ "eval_f1": 0.5102567318224768,
422
+ "eval_loss": 1.1038755178451538,
423
+ "eval_precision": 0.5773669841286153,
424
+ "eval_recall": 0.492870631713158,
425
+ "eval_runtime": 4.4956,
426
+ "eval_samples_per_second": 390.157,
427
+ "eval_steps_per_second": 12.234,
428
+ "step": 3200
429
+ },
430
+ {
431
+ "epoch": 23.08,
432
+ "eval_accuracy": 0.6026225769669328,
433
+ "eval_f1": 0.517406518972602,
434
+ "eval_loss": 1.1019535064697266,
435
+ "eval_precision": 0.5835642472341532,
436
+ "eval_recall": 0.49834149433743596,
437
+ "eval_runtime": 4.5136,
438
+ "eval_samples_per_second": 388.606,
439
+ "eval_steps_per_second": 12.185,
440
+ "step": 3300
441
+ },
442
+ {
443
+ "epoch": 23.78,
444
+ "eval_accuracy": 0.6026225769669328,
445
+ "eval_f1": 0.519434801852095,
446
+ "eval_loss": 1.1018511056900024,
447
+ "eval_precision": 0.5850961696766732,
448
+ "eval_recall": 0.5000828056101978,
449
+ "eval_runtime": 4.4873,
450
+ "eval_samples_per_second": 390.877,
451
+ "eval_steps_per_second": 12.257,
452
+ "step": 3400
453
+ },
454
+ {
455
+ "epoch": 24.48,
456
+ "learning_rate": 2.0979020979020978e-08,
457
+ "loss": 0.9478,
458
+ "step": 3500
459
+ },
460
+ {
461
+ "epoch": 24.48,
462
+ "eval_accuracy": 0.6043329532497149,
463
+ "eval_f1": 0.5206267636935633,
464
+ "eval_loss": 1.1016548871994019,
465
+ "eval_precision": 0.5862327135886009,
466
+ "eval_recall": 0.5013396297726423,
467
+ "eval_runtime": 4.49,
468
+ "eval_samples_per_second": 390.642,
469
+ "eval_steps_per_second": 12.249,
470
+ "step": 3500
471
+ }
472
+ ],
473
+ "max_steps": 3575,
474
+ "num_train_epochs": 25,
475
+ "total_flos": 1.118525266040832e+16,
476
+ "trial_name": null,
477
+ "trial_params": null
478
+ }