oMateos2020 commited on
Commit
a6b3be7
1 Parent(s): 452d75e

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +1080 -0
trainer_state.json ADDED
@@ -0,0 +1,1080 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 31.7626,
3
+ "best_model_checkpoint": "/content/drive/Shareddrives/UCM_SHARED/TFM_ESG/Ejemplo Oficial de T5-Summarization de HF/t5-small_adafactor/checkpoint-8100",
4
+ "epoch": 0.9880028228652082,
5
+ "global_step": 8400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 0.0009823570924488356,
13
+ "loss": 2.9361,
14
+ "step": 150
15
+ },
16
+ {
17
+ "epoch": 0.02,
18
+ "eval_gen_len": 18.8845,
19
+ "eval_loss": 2.6215693950653076,
20
+ "eval_rouge1": 26.8542,
21
+ "eval_rouge2": 6.8667,
22
+ "eval_rougeL": 21.1484,
23
+ "eval_rougeLsum": 21.1563,
24
+ "eval_runtime": 342.0879,
25
+ "eval_samples_per_second": 33.126,
26
+ "eval_steps_per_second": 1.383,
27
+ "step": 150
28
+ },
29
+ {
30
+ "epoch": 0.04,
31
+ "learning_rate": 0.0009648318042813455,
32
+ "loss": 2.8543,
33
+ "step": 300
34
+ },
35
+ {
36
+ "epoch": 0.04,
37
+ "eval_gen_len": 18.8097,
38
+ "eval_loss": 2.5835769176483154,
39
+ "eval_rouge1": 27.2234,
40
+ "eval_rouge2": 7.1753,
41
+ "eval_rougeL": 21.5276,
42
+ "eval_rougeLsum": 21.5308,
43
+ "eval_runtime": 342.5643,
44
+ "eval_samples_per_second": 33.08,
45
+ "eval_steps_per_second": 1.381,
46
+ "step": 300
47
+ },
48
+ {
49
+ "epoch": 0.05,
50
+ "learning_rate": 0.0009471888967301812,
51
+ "loss": 2.814,
52
+ "step": 450
53
+ },
54
+ {
55
+ "epoch": 0.05,
56
+ "eval_gen_len": 18.8294,
57
+ "eval_loss": 2.564615488052368,
58
+ "eval_rouge1": 28.1695,
59
+ "eval_rouge2": 7.7873,
60
+ "eval_rougeL": 22.2229,
61
+ "eval_rougeLsum": 22.2251,
62
+ "eval_runtime": 341.6846,
63
+ "eval_samples_per_second": 33.165,
64
+ "eval_steps_per_second": 1.384,
65
+ "step": 450
66
+ },
67
+ {
68
+ "epoch": 0.07,
69
+ "learning_rate": 0.0009295459891790166,
70
+ "loss": 2.7861,
71
+ "step": 600
72
+ },
73
+ {
74
+ "epoch": 0.07,
75
+ "eval_gen_len": 18.7867,
76
+ "eval_loss": 2.5409207344055176,
77
+ "eval_rouge1": 28.5349,
78
+ "eval_rouge2": 7.9732,
79
+ "eval_rougeL": 22.6959,
80
+ "eval_rougeLsum": 22.7078,
81
+ "eval_runtime": 341.6917,
82
+ "eval_samples_per_second": 33.164,
83
+ "eval_steps_per_second": 1.384,
84
+ "step": 600
85
+ },
86
+ {
87
+ "epoch": 0.09,
88
+ "learning_rate": 0.0009119030816278523,
89
+ "loss": 2.76,
90
+ "step": 750
91
+ },
92
+ {
93
+ "epoch": 0.09,
94
+ "eval_gen_len": 18.7526,
95
+ "eval_loss": 2.5160539150238037,
96
+ "eval_rouge1": 28.5629,
97
+ "eval_rouge2": 7.9485,
98
+ "eval_rougeL": 22.6545,
99
+ "eval_rougeLsum": 22.6617,
100
+ "eval_runtime": 341.8755,
101
+ "eval_samples_per_second": 33.147,
102
+ "eval_steps_per_second": 1.384,
103
+ "step": 750
104
+ },
105
+ {
106
+ "epoch": 0.11,
107
+ "learning_rate": 0.0008942601740766878,
108
+ "loss": 2.7545,
109
+ "step": 900
110
+ },
111
+ {
112
+ "epoch": 0.11,
113
+ "eval_gen_len": 18.7481,
114
+ "eval_loss": 2.5028903484344482,
115
+ "eval_rouge1": 29.1851,
116
+ "eval_rouge2": 8.3586,
117
+ "eval_rougeL": 23.1781,
118
+ "eval_rougeLsum": 23.1815,
119
+ "eval_runtime": 341.7767,
120
+ "eval_samples_per_second": 33.156,
121
+ "eval_steps_per_second": 1.384,
122
+ "step": 900
123
+ },
124
+ {
125
+ "epoch": 0.12,
126
+ "learning_rate": 0.0008767348859091978,
127
+ "loss": 2.7294,
128
+ "step": 1050
129
+ },
130
+ {
131
+ "epoch": 0.12,
132
+ "eval_gen_len": 18.6948,
133
+ "eval_loss": 2.4874510765075684,
134
+ "eval_rouge1": 28.7611,
135
+ "eval_rouge2": 8.0309,
136
+ "eval_rougeL": 22.8289,
137
+ "eval_rougeLsum": 22.8364,
138
+ "eval_runtime": 341.7005,
139
+ "eval_samples_per_second": 33.164,
140
+ "eval_steps_per_second": 1.384,
141
+ "step": 1050
142
+ },
143
+ {
144
+ "epoch": 0.14,
145
+ "learning_rate": 0.0008590919783580335,
146
+ "loss": 2.7005,
147
+ "step": 1200
148
+ },
149
+ {
150
+ "epoch": 0.14,
151
+ "eval_gen_len": 18.7843,
152
+ "eval_loss": 2.4769885540008545,
153
+ "eval_rouge1": 29.1072,
154
+ "eval_rouge2": 8.2713,
155
+ "eval_rougeL": 23.0568,
156
+ "eval_rougeLsum": 23.0602,
157
+ "eval_runtime": 341.9693,
158
+ "eval_samples_per_second": 33.137,
159
+ "eval_steps_per_second": 1.383,
160
+ "step": 1200
161
+ },
162
+ {
163
+ "epoch": 0.16,
164
+ "learning_rate": 0.0008414490708068689,
165
+ "loss": 2.7057,
166
+ "step": 1350
167
+ },
168
+ {
169
+ "epoch": 0.16,
170
+ "eval_gen_len": 18.6973,
171
+ "eval_loss": 2.46754789352417,
172
+ "eval_rouge1": 29.0607,
173
+ "eval_rouge2": 8.4308,
174
+ "eval_rougeL": 23.1094,
175
+ "eval_rougeLsum": 23.1125,
176
+ "eval_runtime": 341.5228,
177
+ "eval_samples_per_second": 33.181,
178
+ "eval_steps_per_second": 1.385,
179
+ "step": 1350
180
+ },
181
+ {
182
+ "epoch": 0.18,
183
+ "learning_rate": 0.0008238061632557046,
184
+ "loss": 2.6779,
185
+ "step": 1500
186
+ },
187
+ {
188
+ "epoch": 0.18,
189
+ "eval_gen_len": 18.7927,
190
+ "eval_loss": 2.4461910724639893,
191
+ "eval_rouge1": 29.4815,
192
+ "eval_rouge2": 8.6203,
193
+ "eval_rougeL": 23.3742,
194
+ "eval_rougeLsum": 23.3745,
195
+ "eval_runtime": 341.6546,
196
+ "eval_samples_per_second": 33.168,
197
+ "eval_steps_per_second": 1.384,
198
+ "step": 1500
199
+ },
200
+ {
201
+ "epoch": 0.19,
202
+ "learning_rate": 0.00080616325570454,
203
+ "loss": 2.6944,
204
+ "step": 1650
205
+ },
206
+ {
207
+ "epoch": 0.19,
208
+ "eval_gen_len": 18.8219,
209
+ "eval_loss": 2.4397881031036377,
210
+ "eval_rouge1": 29.3817,
211
+ "eval_rouge2": 8.5247,
212
+ "eval_rougeL": 23.3088,
213
+ "eval_rougeLsum": 23.3137,
214
+ "eval_runtime": 341.8781,
215
+ "eval_samples_per_second": 33.146,
216
+ "eval_steps_per_second": 1.384,
217
+ "step": 1650
218
+ },
219
+ {
220
+ "epoch": 0.21,
221
+ "learning_rate": 0.0007885203481533757,
222
+ "loss": 2.6589,
223
+ "step": 1800
224
+ },
225
+ {
226
+ "epoch": 0.21,
227
+ "eval_gen_len": 18.7917,
228
+ "eval_loss": 2.429356813430786,
229
+ "eval_rouge1": 29.1914,
230
+ "eval_rouge2": 8.5565,
231
+ "eval_rougeL": 23.2017,
232
+ "eval_rougeLsum": 23.2039,
233
+ "eval_runtime": 341.5952,
234
+ "eval_samples_per_second": 33.174,
235
+ "eval_steps_per_second": 1.385,
236
+ "step": 1800
237
+ },
238
+ {
239
+ "epoch": 0.23,
240
+ "learning_rate": 0.0007708774406022112,
241
+ "loss": 2.6473,
242
+ "step": 1950
243
+ },
244
+ {
245
+ "epoch": 0.23,
246
+ "eval_gen_len": 18.7605,
247
+ "eval_loss": 2.417249917984009,
248
+ "eval_rouge1": 29.9482,
249
+ "eval_rouge2": 8.8321,
250
+ "eval_rougeL": 23.7848,
251
+ "eval_rougeLsum": 23.7954,
252
+ "eval_runtime": 341.816,
253
+ "eval_samples_per_second": 33.152,
254
+ "eval_steps_per_second": 1.384,
255
+ "step": 1950
256
+ },
257
+ {
258
+ "epoch": 0.25,
259
+ "learning_rate": 0.0007532345330510469,
260
+ "loss": 2.6498,
261
+ "step": 2100
262
+ },
263
+ {
264
+ "epoch": 0.25,
265
+ "eval_gen_len": 18.8079,
266
+ "eval_loss": 2.4158124923706055,
267
+ "eval_rouge1": 29.5843,
268
+ "eval_rouge2": 8.9229,
269
+ "eval_rougeL": 23.6193,
270
+ "eval_rougeLsum": 23.6255,
271
+ "eval_runtime": 341.8291,
272
+ "eval_samples_per_second": 33.151,
273
+ "eval_steps_per_second": 1.384,
274
+ "step": 2100
275
+ },
276
+ {
277
+ "epoch": 0.26,
278
+ "learning_rate": 0.0007355916254998823,
279
+ "loss": 2.6321,
280
+ "step": 2250
281
+ },
282
+ {
283
+ "epoch": 0.26,
284
+ "eval_gen_len": 18.7639,
285
+ "eval_loss": 2.4022669792175293,
286
+ "eval_rouge1": 29.9147,
287
+ "eval_rouge2": 9.0085,
288
+ "eval_rougeL": 23.8569,
289
+ "eval_rougeLsum": 23.8709,
290
+ "eval_runtime": 341.59,
291
+ "eval_samples_per_second": 33.174,
292
+ "eval_steps_per_second": 1.385,
293
+ "step": 2250
294
+ },
295
+ {
296
+ "epoch": 0.28,
297
+ "learning_rate": 0.000717948717948718,
298
+ "loss": 2.614,
299
+ "step": 2400
300
+ },
301
+ {
302
+ "epoch": 0.28,
303
+ "eval_gen_len": 18.6922,
304
+ "eval_loss": 2.3971035480499268,
305
+ "eval_rouge1": 29.711,
306
+ "eval_rouge2": 9.0017,
307
+ "eval_rougeL": 23.761,
308
+ "eval_rougeLsum": 23.7742,
309
+ "eval_runtime": 341.4293,
310
+ "eval_samples_per_second": 33.19,
311
+ "eval_steps_per_second": 1.385,
312
+ "step": 2400
313
+ },
314
+ {
315
+ "epoch": 0.3,
316
+ "learning_rate": 0.0007003058103975535,
317
+ "loss": 2.6212,
318
+ "step": 2550
319
+ },
320
+ {
321
+ "epoch": 0.3,
322
+ "eval_gen_len": 18.7082,
323
+ "eval_loss": 2.390623092651367,
324
+ "eval_rouge1": 29.498,
325
+ "eval_rouge2": 8.6483,
326
+ "eval_rougeL": 23.5006,
327
+ "eval_rougeLsum": 23.5112,
328
+ "eval_runtime": 341.8905,
329
+ "eval_samples_per_second": 33.145,
330
+ "eval_steps_per_second": 1.383,
331
+ "step": 2550
332
+ },
333
+ {
334
+ "epoch": 0.32,
335
+ "learning_rate": 0.0006826629028463891,
336
+ "loss": 2.6151,
337
+ "step": 2700
338
+ },
339
+ {
340
+ "epoch": 0.32,
341
+ "eval_gen_len": 18.8923,
342
+ "eval_loss": 2.3796188831329346,
343
+ "eval_rouge1": 29.8502,
344
+ "eval_rouge2": 8.9427,
345
+ "eval_rougeL": 23.6665,
346
+ "eval_rougeLsum": 23.6781,
347
+ "eval_runtime": 341.6707,
348
+ "eval_samples_per_second": 33.166,
349
+ "eval_steps_per_second": 1.384,
350
+ "step": 2700
351
+ },
352
+ {
353
+ "epoch": 0.34,
354
+ "learning_rate": 0.0006650199952952247,
355
+ "loss": 2.6123,
356
+ "step": 2850
357
+ },
358
+ {
359
+ "epoch": 0.34,
360
+ "eval_gen_len": 18.7541,
361
+ "eval_loss": 2.371220588684082,
362
+ "eval_rouge1": 30.0211,
363
+ "eval_rouge2": 9.2412,
364
+ "eval_rougeL": 24.0388,
365
+ "eval_rougeLsum": 24.045,
366
+ "eval_runtime": 341.6315,
367
+ "eval_samples_per_second": 33.17,
368
+ "eval_steps_per_second": 1.385,
369
+ "step": 2850
370
+ },
371
+ {
372
+ "epoch": 0.35,
373
+ "learning_rate": 0.0006473770877440603,
374
+ "loss": 2.5928,
375
+ "step": 3000
376
+ },
377
+ {
378
+ "epoch": 0.35,
379
+ "eval_gen_len": 18.7963,
380
+ "eval_loss": 2.359456777572632,
381
+ "eval_rouge1": 30.3185,
382
+ "eval_rouge2": 9.2796,
383
+ "eval_rougeL": 24.2004,
384
+ "eval_rougeLsum": 24.2101,
385
+ "eval_runtime": 341.8833,
386
+ "eval_samples_per_second": 33.146,
387
+ "eval_steps_per_second": 1.384,
388
+ "step": 3000
389
+ },
390
+ {
391
+ "epoch": 0.37,
392
+ "learning_rate": 0.0006297341801928958,
393
+ "loss": 2.5746,
394
+ "step": 3150
395
+ },
396
+ {
397
+ "epoch": 0.37,
398
+ "eval_gen_len": 18.8229,
399
+ "eval_loss": 2.366935968399048,
400
+ "eval_rouge1": 29.8303,
401
+ "eval_rouge2": 9.1665,
402
+ "eval_rougeL": 23.8553,
403
+ "eval_rougeLsum": 23.8672,
404
+ "eval_runtime": 341.7148,
405
+ "eval_samples_per_second": 33.162,
406
+ "eval_steps_per_second": 1.384,
407
+ "step": 3150
408
+ },
409
+ {
410
+ "epoch": 0.39,
411
+ "learning_rate": 0.0006120912726417314,
412
+ "loss": 2.5619,
413
+ "step": 3300
414
+ },
415
+ {
416
+ "epoch": 0.39,
417
+ "eval_gen_len": 18.8087,
418
+ "eval_loss": 2.3527944087982178,
419
+ "eval_rouge1": 30.4684,
420
+ "eval_rouge2": 9.4451,
421
+ "eval_rougeL": 24.2867,
422
+ "eval_rougeLsum": 24.2939,
423
+ "eval_runtime": 341.8038,
424
+ "eval_samples_per_second": 33.154,
425
+ "eval_steps_per_second": 1.384,
426
+ "step": 3300
427
+ },
428
+ {
429
+ "epoch": 0.41,
430
+ "learning_rate": 0.000594448365090567,
431
+ "loss": 2.5677,
432
+ "step": 3450
433
+ },
434
+ {
435
+ "epoch": 0.41,
436
+ "eval_gen_len": 18.8491,
437
+ "eval_loss": 2.343312978744507,
438
+ "eval_rouge1": 30.496,
439
+ "eval_rouge2": 9.4356,
440
+ "eval_rougeL": 24.31,
441
+ "eval_rougeLsum": 24.3125,
442
+ "eval_runtime": 342.0548,
443
+ "eval_samples_per_second": 33.129,
444
+ "eval_steps_per_second": 1.383,
445
+ "step": 3450
446
+ },
447
+ {
448
+ "epoch": 0.42,
449
+ "learning_rate": 0.0005768054575394025,
450
+ "loss": 2.5456,
451
+ "step": 3600
452
+ },
453
+ {
454
+ "epoch": 0.42,
455
+ "eval_gen_len": 18.809,
456
+ "eval_loss": 2.3360698223114014,
457
+ "eval_rouge1": 30.5752,
458
+ "eval_rouge2": 9.4803,
459
+ "eval_rougeL": 24.3462,
460
+ "eval_rougeLsum": 24.3559,
461
+ "eval_runtime": 341.7402,
462
+ "eval_samples_per_second": 33.16,
463
+ "eval_steps_per_second": 1.384,
464
+ "step": 3600
465
+ },
466
+ {
467
+ "epoch": 0.44,
468
+ "learning_rate": 0.0005591625499882381,
469
+ "loss": 2.5669,
470
+ "step": 3750
471
+ },
472
+ {
473
+ "epoch": 0.44,
474
+ "eval_gen_len": 18.8287,
475
+ "eval_loss": 2.327768325805664,
476
+ "eval_rouge1": 30.0561,
477
+ "eval_rouge2": 9.2402,
478
+ "eval_rougeL": 24.0535,
479
+ "eval_rougeLsum": 24.0713,
480
+ "eval_runtime": 341.71,
481
+ "eval_samples_per_second": 33.163,
482
+ "eval_steps_per_second": 1.384,
483
+ "step": 3750
484
+ },
485
+ {
486
+ "epoch": 0.46,
487
+ "learning_rate": 0.0005415196424370737,
488
+ "loss": 2.5446,
489
+ "step": 3900
490
+ },
491
+ {
492
+ "epoch": 0.46,
493
+ "eval_gen_len": 18.8118,
494
+ "eval_loss": 2.3254404067993164,
495
+ "eval_rouge1": 30.3142,
496
+ "eval_rouge2": 9.4929,
497
+ "eval_rougeL": 24.335,
498
+ "eval_rougeLsum": 24.3432,
499
+ "eval_runtime": 341.8271,
500
+ "eval_samples_per_second": 33.151,
501
+ "eval_steps_per_second": 1.384,
502
+ "step": 3900
503
+ },
504
+ {
505
+ "epoch": 0.48,
506
+ "learning_rate": 0.0005238767348859092,
507
+ "loss": 2.5536,
508
+ "step": 4050
509
+ },
510
+ {
511
+ "epoch": 0.48,
512
+ "eval_gen_len": 18.9035,
513
+ "eval_loss": 2.3251070976257324,
514
+ "eval_rouge1": 30.2523,
515
+ "eval_rouge2": 9.3005,
516
+ "eval_rougeL": 24.163,
517
+ "eval_rougeLsum": 24.1711,
518
+ "eval_runtime": 341.8151,
519
+ "eval_samples_per_second": 33.152,
520
+ "eval_steps_per_second": 1.384,
521
+ "step": 4050
522
+ },
523
+ {
524
+ "epoch": 0.49,
525
+ "learning_rate": 0.0005062338273347448,
526
+ "loss": 2.5154,
527
+ "step": 4200
528
+ },
529
+ {
530
+ "epoch": 0.49,
531
+ "eval_gen_len": 18.7458,
532
+ "eval_loss": 2.313904047012329,
533
+ "eval_rouge1": 30.7718,
534
+ "eval_rouge2": 9.8676,
535
+ "eval_rougeL": 24.7111,
536
+ "eval_rougeLsum": 24.7266,
537
+ "eval_runtime": 341.6852,
538
+ "eval_samples_per_second": 33.165,
539
+ "eval_steps_per_second": 1.384,
540
+ "step": 4200
541
+ },
542
+ {
543
+ "epoch": 0.51,
544
+ "learning_rate": 0.0004885909197835804,
545
+ "loss": 2.5176,
546
+ "step": 4350
547
+ },
548
+ {
549
+ "epoch": 0.51,
550
+ "eval_gen_len": 18.7983,
551
+ "eval_loss": 2.309589385986328,
552
+ "eval_rouge1": 30.5617,
553
+ "eval_rouge2": 9.6562,
554
+ "eval_rougeL": 24.4754,
555
+ "eval_rougeLsum": 24.4862,
556
+ "eval_runtime": 341.8284,
557
+ "eval_samples_per_second": 33.151,
558
+ "eval_steps_per_second": 1.384,
559
+ "step": 4350
560
+ },
561
+ {
562
+ "epoch": 0.53,
563
+ "learning_rate": 0.0004709480122324159,
564
+ "loss": 2.5307,
565
+ "step": 4500
566
+ },
567
+ {
568
+ "epoch": 0.53,
569
+ "eval_gen_len": 18.8217,
570
+ "eval_loss": 2.3089160919189453,
571
+ "eval_rouge1": 30.601,
572
+ "eval_rouge2": 9.6672,
573
+ "eval_rougeL": 24.4465,
574
+ "eval_rougeLsum": 24.4567,
575
+ "eval_runtime": 341.6213,
576
+ "eval_samples_per_second": 33.171,
577
+ "eval_steps_per_second": 1.385,
578
+ "step": 4500
579
+ },
580
+ {
581
+ "epoch": 0.55,
582
+ "learning_rate": 0.00045330510468125144,
583
+ "loss": 2.515,
584
+ "step": 4650
585
+ },
586
+ {
587
+ "epoch": 0.55,
588
+ "eval_gen_len": 18.8096,
589
+ "eval_loss": 2.2991859912872314,
590
+ "eval_rouge1": 31.1807,
591
+ "eval_rouge2": 10.0105,
592
+ "eval_rougeL": 24.9947,
593
+ "eval_rougeLsum": 25.0096,
594
+ "eval_runtime": 341.7452,
595
+ "eval_samples_per_second": 33.159,
596
+ "eval_steps_per_second": 1.384,
597
+ "step": 4650
598
+ },
599
+ {
600
+ "epoch": 0.56,
601
+ "learning_rate": 0.000435662197130087,
602
+ "loss": 2.5168,
603
+ "step": 4800
604
+ },
605
+ {
606
+ "epoch": 0.56,
607
+ "eval_gen_len": 18.8254,
608
+ "eval_loss": 2.292003631591797,
609
+ "eval_rouge1": 30.8999,
610
+ "eval_rouge2": 9.8832,
611
+ "eval_rougeL": 24.7713,
612
+ "eval_rougeLsum": 24.7823,
613
+ "eval_runtime": 341.8542,
614
+ "eval_samples_per_second": 33.149,
615
+ "eval_steps_per_second": 1.384,
616
+ "step": 4800
617
+ },
618
+ {
619
+ "epoch": 0.58,
620
+ "learning_rate": 0.0004180192895789226,
621
+ "loss": 2.5021,
622
+ "step": 4950
623
+ },
624
+ {
625
+ "epoch": 0.58,
626
+ "eval_gen_len": 18.8787,
627
+ "eval_loss": 2.285507917404175,
628
+ "eval_rouge1": 31.0569,
629
+ "eval_rouge2": 9.8752,
630
+ "eval_rougeL": 24.791,
631
+ "eval_rougeLsum": 24.8033,
632
+ "eval_runtime": 341.7747,
633
+ "eval_samples_per_second": 33.156,
634
+ "eval_steps_per_second": 1.384,
635
+ "step": 4950
636
+ },
637
+ {
638
+ "epoch": 0.6,
639
+ "learning_rate": 0.00040037638202775815,
640
+ "loss": 2.501,
641
+ "step": 5100
642
+ },
643
+ {
644
+ "epoch": 0.6,
645
+ "eval_gen_len": 18.8315,
646
+ "eval_loss": 2.2868235111236572,
647
+ "eval_rouge1": 30.7132,
648
+ "eval_rouge2": 9.8654,
649
+ "eval_rougeL": 24.6084,
650
+ "eval_rougeLsum": 24.6244,
651
+ "eval_runtime": 341.9154,
652
+ "eval_samples_per_second": 33.143,
653
+ "eval_steps_per_second": 1.383,
654
+ "step": 5100
655
+ },
656
+ {
657
+ "epoch": 0.62,
658
+ "learning_rate": 0.0003827334744765937,
659
+ "loss": 2.4849,
660
+ "step": 5250
661
+ },
662
+ {
663
+ "epoch": 0.62,
664
+ "eval_gen_len": 18.8196,
665
+ "eval_loss": 2.2783043384552,
666
+ "eval_rouge1": 31.3434,
667
+ "eval_rouge2": 10.206,
668
+ "eval_rougeL": 25.0954,
669
+ "eval_rougeLsum": 25.1114,
670
+ "eval_runtime": 341.9076,
671
+ "eval_samples_per_second": 33.143,
672
+ "eval_steps_per_second": 1.383,
673
+ "step": 5250
674
+ },
675
+ {
676
+ "epoch": 0.64,
677
+ "learning_rate": 0.0003650905669254293,
678
+ "loss": 2.4939,
679
+ "step": 5400
680
+ },
681
+ {
682
+ "epoch": 0.64,
683
+ "eval_gen_len": 18.8579,
684
+ "eval_loss": 2.275907039642334,
685
+ "eval_rouge1": 31.1467,
686
+ "eval_rouge2": 10.0457,
687
+ "eval_rougeL": 24.964,
688
+ "eval_rougeLsum": 24.9793,
689
+ "eval_runtime": 341.6475,
690
+ "eval_samples_per_second": 33.169,
691
+ "eval_steps_per_second": 1.384,
692
+ "step": 5400
693
+ },
694
+ {
695
+ "epoch": 0.65,
696
+ "learning_rate": 0.00034744765937426485,
697
+ "loss": 2.4624,
698
+ "step": 5550
699
+ },
700
+ {
701
+ "epoch": 0.65,
702
+ "eval_gen_len": 18.8098,
703
+ "eval_loss": 2.2712931632995605,
704
+ "eval_rouge1": 31.4288,
705
+ "eval_rouge2": 10.1719,
706
+ "eval_rougeL": 25.1096,
707
+ "eval_rougeLsum": 25.1276,
708
+ "eval_runtime": 341.723,
709
+ "eval_samples_per_second": 33.161,
710
+ "eval_steps_per_second": 1.384,
711
+ "step": 5550
712
+ },
713
+ {
714
+ "epoch": 0.67,
715
+ "learning_rate": 0.0003298047518231004,
716
+ "loss": 2.456,
717
+ "step": 5700
718
+ },
719
+ {
720
+ "epoch": 0.67,
721
+ "eval_gen_len": 18.8262,
722
+ "eval_loss": 2.2674217224121094,
723
+ "eval_rouge1": 31.1515,
724
+ "eval_rouge2": 10.1208,
725
+ "eval_rougeL": 25.0456,
726
+ "eval_rougeLsum": 25.0532,
727
+ "eval_runtime": 341.9008,
728
+ "eval_samples_per_second": 33.144,
729
+ "eval_steps_per_second": 1.383,
730
+ "step": 5700
731
+ },
732
+ {
733
+ "epoch": 0.69,
734
+ "learning_rate": 0.000312161844271936,
735
+ "loss": 2.4667,
736
+ "step": 5850
737
+ },
738
+ {
739
+ "epoch": 0.69,
740
+ "eval_gen_len": 18.7816,
741
+ "eval_loss": 2.262035608291626,
742
+ "eval_rouge1": 31.3741,
743
+ "eval_rouge2": 10.1733,
744
+ "eval_rougeL": 25.1421,
745
+ "eval_rougeLsum": 25.159,
746
+ "eval_runtime": 341.6066,
747
+ "eval_samples_per_second": 33.173,
748
+ "eval_steps_per_second": 1.385,
749
+ "step": 5850
750
+ },
751
+ {
752
+ "epoch": 0.71,
753
+ "learning_rate": 0.000294636556104446,
754
+ "loss": 2.4658,
755
+ "step": 6000
756
+ },
757
+ {
758
+ "epoch": 0.71,
759
+ "eval_gen_len": 18.8,
760
+ "eval_loss": 2.258843183517456,
761
+ "eval_rouge1": 31.3913,
762
+ "eval_rouge2": 10.2645,
763
+ "eval_rougeL": 25.1746,
764
+ "eval_rougeLsum": 25.188,
765
+ "eval_runtime": 341.9247,
766
+ "eval_samples_per_second": 33.142,
767
+ "eval_steps_per_second": 1.383,
768
+ "step": 6000
769
+ },
770
+ {
771
+ "epoch": 0.72,
772
+ "learning_rate": 0.00027699364855328156,
773
+ "loss": 2.4943,
774
+ "step": 6150
775
+ },
776
+ {
777
+ "epoch": 0.72,
778
+ "eval_gen_len": 18.8074,
779
+ "eval_loss": 2.2533156871795654,
780
+ "eval_rouge1": 31.3905,
781
+ "eval_rouge2": 10.164,
782
+ "eval_rougeL": 25.093,
783
+ "eval_rougeLsum": 25.107,
784
+ "eval_runtime": 341.8317,
785
+ "eval_samples_per_second": 33.151,
786
+ "eval_steps_per_second": 1.384,
787
+ "step": 6150
788
+ },
789
+ {
790
+ "epoch": 0.74,
791
+ "learning_rate": 0.00025935074100211713,
792
+ "loss": 2.473,
793
+ "step": 6300
794
+ },
795
+ {
796
+ "epoch": 0.74,
797
+ "eval_gen_len": 18.7439,
798
+ "eval_loss": 2.2551848888397217,
799
+ "eval_rouge1": 31.1105,
800
+ "eval_rouge2": 10.1939,
801
+ "eval_rougeL": 24.9214,
802
+ "eval_rougeLsum": 24.9321,
803
+ "eval_runtime": 341.6034,
804
+ "eval_samples_per_second": 33.173,
805
+ "eval_steps_per_second": 1.385,
806
+ "step": 6300
807
+ },
808
+ {
809
+ "epoch": 0.76,
810
+ "learning_rate": 0.00024170783345095273,
811
+ "loss": 2.4687,
812
+ "step": 6450
813
+ },
814
+ {
815
+ "epoch": 0.76,
816
+ "eval_gen_len": 18.7709,
817
+ "eval_loss": 2.247098684310913,
818
+ "eval_rouge1": 31.3387,
819
+ "eval_rouge2": 10.2446,
820
+ "eval_rougeL": 25.0746,
821
+ "eval_rougeLsum": 25.0936,
822
+ "eval_runtime": 341.7347,
823
+ "eval_samples_per_second": 33.16,
824
+ "eval_steps_per_second": 1.384,
825
+ "step": 6450
826
+ },
827
+ {
828
+ "epoch": 0.78,
829
+ "learning_rate": 0.0002240649258997883,
830
+ "loss": 2.4286,
831
+ "step": 6600
832
+ },
833
+ {
834
+ "epoch": 0.78,
835
+ "eval_gen_len": 18.8313,
836
+ "eval_loss": 2.2467916011810303,
837
+ "eval_rouge1": 31.1953,
838
+ "eval_rouge2": 10.1277,
839
+ "eval_rougeL": 24.9535,
840
+ "eval_rougeLsum": 24.9714,
841
+ "eval_runtime": 341.8485,
842
+ "eval_samples_per_second": 33.149,
843
+ "eval_steps_per_second": 1.384,
844
+ "step": 6600
845
+ },
846
+ {
847
+ "epoch": 0.79,
848
+ "learning_rate": 0.00020642201834862386,
849
+ "loss": 2.4492,
850
+ "step": 6750
851
+ },
852
+ {
853
+ "epoch": 0.79,
854
+ "eval_gen_len": 18.7977,
855
+ "eval_loss": 2.2422139644622803,
856
+ "eval_rouge1": 31.7203,
857
+ "eval_rouge2": 10.4934,
858
+ "eval_rougeL": 25.3862,
859
+ "eval_rougeLsum": 25.3946,
860
+ "eval_runtime": 341.6909,
861
+ "eval_samples_per_second": 33.164,
862
+ "eval_steps_per_second": 1.384,
863
+ "step": 6750
864
+ },
865
+ {
866
+ "epoch": 0.81,
867
+ "learning_rate": 0.00018877911079745943,
868
+ "loss": 2.4427,
869
+ "step": 6900
870
+ },
871
+ {
872
+ "epoch": 0.81,
873
+ "eval_gen_len": 18.8144,
874
+ "eval_loss": 2.2402756214141846,
875
+ "eval_rouge1": 31.5498,
876
+ "eval_rouge2": 10.4086,
877
+ "eval_rougeL": 25.2384,
878
+ "eval_rougeLsum": 25.2593,
879
+ "eval_runtime": 341.7269,
880
+ "eval_samples_per_second": 33.161,
881
+ "eval_steps_per_second": 1.384,
882
+ "step": 6900
883
+ },
884
+ {
885
+ "epoch": 0.83,
886
+ "learning_rate": 0.000171136203246295,
887
+ "loss": 2.4641,
888
+ "step": 7050
889
+ },
890
+ {
891
+ "epoch": 0.83,
892
+ "eval_gen_len": 18.7966,
893
+ "eval_loss": 2.2365546226501465,
894
+ "eval_rouge1": 31.4038,
895
+ "eval_rouge2": 10.3691,
896
+ "eval_rougeL": 25.1786,
897
+ "eval_rougeLsum": 25.195,
898
+ "eval_runtime": 341.8284,
899
+ "eval_samples_per_second": 33.151,
900
+ "eval_steps_per_second": 1.384,
901
+ "step": 7050
902
+ },
903
+ {
904
+ "epoch": 0.85,
905
+ "learning_rate": 0.00015349329569513056,
906
+ "loss": 2.4276,
907
+ "step": 7200
908
+ },
909
+ {
910
+ "epoch": 0.85,
911
+ "eval_gen_len": 18.7789,
912
+ "eval_loss": 2.2344412803649902,
913
+ "eval_rouge1": 31.5076,
914
+ "eval_rouge2": 10.4403,
915
+ "eval_rougeL": 25.299,
916
+ "eval_rougeLsum": 25.3201,
917
+ "eval_runtime": 342.6233,
918
+ "eval_samples_per_second": 33.074,
919
+ "eval_steps_per_second": 1.381,
920
+ "step": 7200
921
+ },
922
+ {
923
+ "epoch": 0.86,
924
+ "learning_rate": 0.00013585038814396613,
925
+ "loss": 2.4402,
926
+ "step": 7350
927
+ },
928
+ {
929
+ "epoch": 0.86,
930
+ "eval_gen_len": 18.7713,
931
+ "eval_loss": 2.2317440509796143,
932
+ "eval_rouge1": 31.5216,
933
+ "eval_rouge2": 10.3901,
934
+ "eval_rougeL": 25.276,
935
+ "eval_rougeLsum": 25.2943,
936
+ "eval_runtime": 345.4971,
937
+ "eval_samples_per_second": 32.799,
938
+ "eval_steps_per_second": 1.369,
939
+ "step": 7350
940
+ },
941
+ {
942
+ "epoch": 0.88,
943
+ "learning_rate": 0.00011820748059280171,
944
+ "loss": 2.44,
945
+ "step": 7500
946
+ },
947
+ {
948
+ "epoch": 0.88,
949
+ "eval_gen_len": 18.7671,
950
+ "eval_loss": 2.2292771339416504,
951
+ "eval_rouge1": 31.4244,
952
+ "eval_rouge2": 10.4211,
953
+ "eval_rougeL": 25.2592,
954
+ "eval_rougeLsum": 25.2735,
955
+ "eval_runtime": 342.0133,
956
+ "eval_samples_per_second": 33.133,
957
+ "eval_steps_per_second": 1.383,
958
+ "step": 7500
959
+ },
960
+ {
961
+ "epoch": 0.9,
962
+ "learning_rate": 0.00010056457304163728,
963
+ "loss": 2.4251,
964
+ "step": 7650
965
+ },
966
+ {
967
+ "epoch": 0.9,
968
+ "eval_gen_len": 18.7972,
969
+ "eval_loss": 2.226907968521118,
970
+ "eval_rouge1": 31.4887,
971
+ "eval_rouge2": 10.3959,
972
+ "eval_rougeL": 25.2335,
973
+ "eval_rougeLsum": 25.2545,
974
+ "eval_runtime": 341.7402,
975
+ "eval_samples_per_second": 33.16,
976
+ "eval_steps_per_second": 1.384,
977
+ "step": 7650
978
+ },
979
+ {
980
+ "epoch": 0.92,
981
+ "learning_rate": 8.292166549047284e-05,
982
+ "loss": 2.456,
983
+ "step": 7800
984
+ },
985
+ {
986
+ "epoch": 0.92,
987
+ "eval_gen_len": 18.786,
988
+ "eval_loss": 2.224229097366333,
989
+ "eval_rouge1": 31.4508,
990
+ "eval_rouge2": 10.4079,
991
+ "eval_rougeL": 25.2328,
992
+ "eval_rougeLsum": 25.2564,
993
+ "eval_runtime": 341.8379,
994
+ "eval_samples_per_second": 33.15,
995
+ "eval_steps_per_second": 1.384,
996
+ "step": 7800
997
+ },
998
+ {
999
+ "epoch": 0.94,
1000
+ "learning_rate": 6.527875793930841e-05,
1001
+ "loss": 2.4181,
1002
+ "step": 7950
1003
+ },
1004
+ {
1005
+ "epoch": 0.94,
1006
+ "eval_gen_len": 18.8012,
1007
+ "eval_loss": 2.2223522663116455,
1008
+ "eval_rouge1": 31.6181,
1009
+ "eval_rouge2": 10.5558,
1010
+ "eval_rougeL": 25.3867,
1011
+ "eval_rougeLsum": 25.4042,
1012
+ "eval_runtime": 342.1259,
1013
+ "eval_samples_per_second": 33.122,
1014
+ "eval_steps_per_second": 1.383,
1015
+ "step": 7950
1016
+ },
1017
+ {
1018
+ "epoch": 0.95,
1019
+ "learning_rate": 4.763585038814397e-05,
1020
+ "loss": 2.4288,
1021
+ "step": 8100
1022
+ },
1023
+ {
1024
+ "epoch": 0.95,
1025
+ "eval_gen_len": 18.7953,
1026
+ "eval_loss": 2.22170352935791,
1027
+ "eval_rouge1": 31.7626,
1028
+ "eval_rouge2": 10.6059,
1029
+ "eval_rougeL": 25.4827,
1030
+ "eval_rougeLsum": 25.4958,
1031
+ "eval_runtime": 341.8728,
1032
+ "eval_samples_per_second": 33.147,
1033
+ "eval_steps_per_second": 1.384,
1034
+ "step": 8100
1035
+ },
1036
+ {
1037
+ "epoch": 0.97,
1038
+ "learning_rate": 2.9992942836979537e-05,
1039
+ "loss": 2.4327,
1040
+ "step": 8250
1041
+ },
1042
+ {
1043
+ "epoch": 0.97,
1044
+ "eval_gen_len": 18.7827,
1045
+ "eval_loss": 2.220174789428711,
1046
+ "eval_rouge1": 31.6839,
1047
+ "eval_rouge2": 10.5615,
1048
+ "eval_rougeL": 25.4137,
1049
+ "eval_rougeLsum": 25.433,
1050
+ "eval_runtime": 342.1089,
1051
+ "eval_samples_per_second": 33.124,
1052
+ "eval_steps_per_second": 1.383,
1053
+ "step": 8250
1054
+ },
1055
+ {
1056
+ "epoch": 0.99,
1057
+ "learning_rate": 1.2350035285815103e-05,
1058
+ "loss": 2.4118,
1059
+ "step": 8400
1060
+ },
1061
+ {
1062
+ "epoch": 0.99,
1063
+ "eval_gen_len": 18.7979,
1064
+ "eval_loss": 2.2196593284606934,
1065
+ "eval_rouge1": 31.6519,
1066
+ "eval_rouge2": 10.4949,
1067
+ "eval_rougeL": 25.3751,
1068
+ "eval_rougeLsum": 25.3984,
1069
+ "eval_runtime": 342.1132,
1070
+ "eval_samples_per_second": 33.124,
1071
+ "eval_steps_per_second": 1.383,
1072
+ "step": 8400
1073
+ }
1074
+ ],
1075
+ "max_steps": 8502,
1076
+ "num_train_epochs": 1,
1077
+ "total_flos": 4.261943991730176e+16,
1078
+ "trial_name": null,
1079
+ "trial_params": null
1080
+ }