GlycerinLOL commited on
Commit
3706827
1 Parent(s): 5dfc59b

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +6 -1
  2. train_results.json +7 -0
  3. trainer_state.json +382 -0
all_results.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "predict_f1": 0.9117,
3
  "predict_gen_len": 27.57534850891124,
4
  "predict_loss": 1.5995206832885742,
@@ -11,5 +12,9 @@
11
  "predict_runtime": 1531.3156,
12
  "predict_samples": 11334,
13
  "predict_samples_per_second": 7.401,
14
- "predict_steps_per_second": 0.463
 
 
 
 
15
  }
 
1
  {
2
+ "epoch": 16.0,
3
  "predict_f1": 0.9117,
4
  "predict_gen_len": 27.57534850891124,
5
  "predict_loss": 1.5995206832885742,
 
12
  "predict_runtime": 1531.3156,
13
  "predict_samples": 11334,
14
  "predict_samples_per_second": 7.401,
15
+ "predict_steps_per_second": 0.463,
16
+ "train_loss": 0.8733468595713434,
17
+ "train_runtime": 22758.1802,
18
+ "train_samples_per_second": 35.152,
19
+ "train_steps_per_second": 0.366
20
  }
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 16.0,
3
+ "train_loss": 0.8733468595713434,
4
+ "train_runtime": 22758.1802,
5
+ "train_samples_per_second": 35.152,
6
+ "train_steps_per_second": 0.366
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,382 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 16.0,
5
+ "eval_steps": 500,
6
+ "global_step": 8336,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.96,
13
+ "learning_rate": 1.9360204734484968e-05,
14
+ "loss": 2.0443,
15
+ "step": 500
16
+ },
17
+ {
18
+ "epoch": 1.0,
19
+ "eval_f1": 0.9049,
20
+ "eval_gen_len": 28.363272727272726,
21
+ "eval_loss": 1.7046316862106323,
22
+ "eval_precision": 0.9041,
23
+ "eval_recall": 0.9061,
24
+ "eval_rouge1": 0.4488,
25
+ "eval_rouge2": 0.203,
26
+ "eval_rougeL": 0.3633,
27
+ "eval_rougeLsum": 0.3633,
28
+ "eval_runtime": 577.3748,
29
+ "eval_samples_per_second": 4.763,
30
+ "eval_steps_per_second": 0.298,
31
+ "step": 521
32
+ },
33
+ {
34
+ "epoch": 1.92,
35
+ "learning_rate": 1.872040946896993e-05,
36
+ "loss": 1.7826,
37
+ "step": 1000
38
+ },
39
+ {
40
+ "epoch": 2.0,
41
+ "eval_f1": 0.9072,
42
+ "eval_gen_len": 28.19490909090909,
43
+ "eval_loss": 1.6347475051879883,
44
+ "eval_precision": 0.9062,
45
+ "eval_recall": 0.9085,
46
+ "eval_rouge1": 0.4616,
47
+ "eval_rouge2": 0.2133,
48
+ "eval_rougeL": 0.3761,
49
+ "eval_rougeLsum": 0.3758,
50
+ "eval_runtime": 524.6485,
51
+ "eval_samples_per_second": 5.242,
52
+ "eval_steps_per_second": 0.328,
53
+ "step": 1042
54
+ },
55
+ {
56
+ "epoch": 2.88,
57
+ "learning_rate": 1.8080614203454897e-05,
58
+ "loss": 1.7134,
59
+ "step": 1500
60
+ },
61
+ {
62
+ "epoch": 3.0,
63
+ "eval_f1": 0.9084,
64
+ "eval_gen_len": 28.521818181818183,
65
+ "eval_loss": 1.5991039276123047,
66
+ "eval_precision": 0.9072,
67
+ "eval_recall": 0.91,
68
+ "eval_rouge1": 0.4683,
69
+ "eval_rouge2": 0.2186,
70
+ "eval_rougeL": 0.3824,
71
+ "eval_rougeLsum": 0.3822,
72
+ "eval_runtime": 539.0316,
73
+ "eval_samples_per_second": 5.102,
74
+ "eval_steps_per_second": 0.319,
75
+ "step": 1563
76
+ },
77
+ {
78
+ "epoch": 3.84,
79
+ "learning_rate": 1.744081893793986e-05,
80
+ "loss": 1.6664,
81
+ "step": 2000
82
+ },
83
+ {
84
+ "epoch": 4.0,
85
+ "eval_f1": 0.9096,
86
+ "eval_gen_len": 28.24981818181818,
87
+ "eval_loss": 1.5767467021942139,
88
+ "eval_precision": 0.9087,
89
+ "eval_recall": 0.9109,
90
+ "eval_rouge1": 0.4738,
91
+ "eval_rouge2": 0.2233,
92
+ "eval_rougeL": 0.3878,
93
+ "eval_rougeLsum": 0.3876,
94
+ "eval_runtime": 529.9968,
95
+ "eval_samples_per_second": 5.189,
96
+ "eval_steps_per_second": 0.325,
97
+ "step": 2084
98
+ },
99
+ {
100
+ "epoch": 4.8,
101
+ "learning_rate": 1.6801023672424827e-05,
102
+ "loss": 1.6296,
103
+ "step": 2500
104
+ },
105
+ {
106
+ "epoch": 5.0,
107
+ "eval_f1": 0.9103,
108
+ "eval_gen_len": 28.239636363636365,
109
+ "eval_loss": 1.5595422983169556,
110
+ "eval_precision": 0.9093,
111
+ "eval_recall": 0.9117,
112
+ "eval_rouge1": 0.4775,
113
+ "eval_rouge2": 0.2265,
114
+ "eval_rougeL": 0.3911,
115
+ "eval_rougeLsum": 0.391,
116
+ "eval_runtime": 526.5193,
117
+ "eval_samples_per_second": 5.223,
118
+ "eval_steps_per_second": 0.327,
119
+ "step": 2605
120
+ },
121
+ {
122
+ "epoch": 5.76,
123
+ "learning_rate": 1.616122840690979e-05,
124
+ "loss": 1.5984,
125
+ "step": 3000
126
+ },
127
+ {
128
+ "epoch": 6.0,
129
+ "eval_f1": 0.9109,
130
+ "eval_gen_len": 28.28,
131
+ "eval_loss": 1.5468252897262573,
132
+ "eval_precision": 0.9098,
133
+ "eval_recall": 0.9124,
134
+ "eval_rouge1": 0.4805,
135
+ "eval_rouge2": 0.2284,
136
+ "eval_rougeL": 0.3941,
137
+ "eval_rougeLsum": 0.3938,
138
+ "eval_runtime": 512.3397,
139
+ "eval_samples_per_second": 5.368,
140
+ "eval_steps_per_second": 0.336,
141
+ "step": 3126
142
+ },
143
+ {
144
+ "epoch": 6.72,
145
+ "learning_rate": 1.5521433141394756e-05,
146
+ "loss": 1.5738,
147
+ "step": 3500
148
+ },
149
+ {
150
+ "epoch": 7.0,
151
+ "eval_f1": 0.9113,
152
+ "eval_gen_len": 27.837818181818182,
153
+ "eval_loss": 1.5370196104049683,
154
+ "eval_precision": 0.9105,
155
+ "eval_recall": 0.9124,
156
+ "eval_rouge1": 0.4807,
157
+ "eval_rouge2": 0.2296,
158
+ "eval_rougeL": 0.3945,
159
+ "eval_rougeLsum": 0.3946,
160
+ "eval_runtime": 509.6023,
161
+ "eval_samples_per_second": 5.396,
162
+ "eval_steps_per_second": 0.338,
163
+ "step": 3647
164
+ },
165
+ {
166
+ "epoch": 7.68,
167
+ "learning_rate": 1.0403071017274472e-05,
168
+ "loss": 1.5476,
169
+ "step": 4000
170
+ },
171
+ {
172
+ "epoch": 8.0,
173
+ "eval_f1": 0.9114,
174
+ "eval_gen_len": 27.736363636363638,
175
+ "eval_loss": 1.530755639076233,
176
+ "eval_precision": 0.9108,
177
+ "eval_recall": 0.9125,
178
+ "eval_rouge1": 0.4823,
179
+ "eval_rouge2": 0.2315,
180
+ "eval_rougeL": 0.3963,
181
+ "eval_rougeLsum": 0.3965,
182
+ "eval_runtime": 510.2185,
183
+ "eval_samples_per_second": 5.39,
184
+ "eval_steps_per_second": 0.337,
185
+ "step": 4168
186
+ },
187
+ {
188
+ "epoch": 8.64,
189
+ "learning_rate": 9.203454894433782e-06,
190
+ "loss": 1.535,
191
+ "step": 4500
192
+ },
193
+ {
194
+ "epoch": 9.0,
195
+ "eval_f1": 0.9116,
196
+ "eval_gen_len": 27.653454545454544,
197
+ "eval_loss": 1.5260871648788452,
198
+ "eval_precision": 0.911,
199
+ "eval_recall": 0.9125,
200
+ "eval_rouge1": 0.4829,
201
+ "eval_rouge2": 0.2309,
202
+ "eval_rougeL": 0.3974,
203
+ "eval_rougeLsum": 0.3974,
204
+ "eval_runtime": 503.2649,
205
+ "eval_samples_per_second": 5.464,
206
+ "eval_steps_per_second": 0.342,
207
+ "step": 4689
208
+ },
209
+ {
210
+ "epoch": 9.6,
211
+ "learning_rate": 8.003838771593091e-06,
212
+ "loss": 1.52,
213
+ "step": 5000
214
+ },
215
+ {
216
+ "epoch": 10.0,
217
+ "eval_f1": 0.9117,
218
+ "eval_gen_len": 27.816,
219
+ "eval_loss": 1.52312433719635,
220
+ "eval_precision": 0.911,
221
+ "eval_recall": 0.9128,
222
+ "eval_rouge1": 0.4847,
223
+ "eval_rouge2": 0.2332,
224
+ "eval_rougeL": 0.3992,
225
+ "eval_rougeLsum": 0.3993,
226
+ "eval_runtime": 522.9989,
227
+ "eval_samples_per_second": 5.258,
228
+ "eval_steps_per_second": 0.329,
229
+ "step": 5210
230
+ },
231
+ {
232
+ "epoch": 10.56,
233
+ "learning_rate": 6.8042226487524e-06,
234
+ "loss": 1.5145,
235
+ "step": 5500
236
+ },
237
+ {
238
+ "epoch": 11.0,
239
+ "eval_f1": 0.9121,
240
+ "eval_gen_len": 27.360363636363637,
241
+ "eval_loss": 1.519996166229248,
242
+ "eval_precision": 0.9119,
243
+ "eval_recall": 0.9127,
244
+ "eval_rouge1": 0.4851,
245
+ "eval_rouge2": 0.2339,
246
+ "eval_rougeL": 0.4004,
247
+ "eval_rougeLsum": 0.4006,
248
+ "eval_runtime": 501.564,
249
+ "eval_samples_per_second": 5.483,
250
+ "eval_steps_per_second": 0.343,
251
+ "step": 5731
252
+ },
253
+ {
254
+ "epoch": 11.52,
255
+ "learning_rate": 5.6046065259117085e-06,
256
+ "loss": 1.5028,
257
+ "step": 6000
258
+ },
259
+ {
260
+ "epoch": 12.0,
261
+ "eval_f1": 0.9122,
262
+ "eval_gen_len": 27.462545454545456,
263
+ "eval_loss": 1.5178437232971191,
264
+ "eval_precision": 0.9118,
265
+ "eval_recall": 0.9129,
266
+ "eval_rouge1": 0.4858,
267
+ "eval_rouge2": 0.2345,
268
+ "eval_rougeL": 0.4001,
269
+ "eval_rougeLsum": 0.4002,
270
+ "eval_runtime": 501.8356,
271
+ "eval_samples_per_second": 5.48,
272
+ "eval_steps_per_second": 0.343,
273
+ "step": 6252
274
+ },
275
+ {
276
+ "epoch": 12.48,
277
+ "learning_rate": 4.404990403071018e-06,
278
+ "loss": 1.4946,
279
+ "step": 6500
280
+ },
281
+ {
282
+ "epoch": 13.0,
283
+ "eval_f1": 0.9121,
284
+ "eval_gen_len": 27.67890909090909,
285
+ "eval_loss": 1.5164216756820679,
286
+ "eval_precision": 0.9115,
287
+ "eval_recall": 0.9131,
288
+ "eval_rouge1": 0.4859,
289
+ "eval_rouge2": 0.2341,
290
+ "eval_rougeL": 0.4004,
291
+ "eval_rougeLsum": 0.4005,
292
+ "eval_runtime": 506.9944,
293
+ "eval_samples_per_second": 5.424,
294
+ "eval_steps_per_second": 0.339,
295
+ "step": 6773
296
+ },
297
+ {
298
+ "epoch": 13.44,
299
+ "learning_rate": 3.2053742802303266e-06,
300
+ "loss": 1.4877,
301
+ "step": 7000
302
+ },
303
+ {
304
+ "epoch": 14.0,
305
+ "eval_f1": 0.9123,
306
+ "eval_gen_len": 27.580363636363636,
307
+ "eval_loss": 1.515085220336914,
308
+ "eval_precision": 0.9119,
309
+ "eval_recall": 0.9131,
310
+ "eval_rouge1": 0.4868,
311
+ "eval_rouge2": 0.235,
312
+ "eval_rougeL": 0.4013,
313
+ "eval_rougeLsum": 0.4013,
314
+ "eval_runtime": 510.129,
315
+ "eval_samples_per_second": 5.391,
316
+ "eval_steps_per_second": 0.337,
317
+ "step": 7294
318
+ },
319
+ {
320
+ "epoch": 14.4,
321
+ "learning_rate": 2.0057581573896352e-06,
322
+ "loss": 1.4855,
323
+ "step": 7500
324
+ },
325
+ {
326
+ "epoch": 15.0,
327
+ "eval_f1": 0.9122,
328
+ "eval_gen_len": 27.584363636363637,
329
+ "eval_loss": 1.5146222114562988,
330
+ "eval_precision": 0.9117,
331
+ "eval_recall": 0.9131,
332
+ "eval_rouge1": 0.4863,
333
+ "eval_rouge2": 0.2349,
334
+ "eval_rougeL": 0.4014,
335
+ "eval_rougeLsum": 0.4016,
336
+ "eval_runtime": 507.3504,
337
+ "eval_samples_per_second": 5.42,
338
+ "eval_steps_per_second": 0.339,
339
+ "step": 7815
340
+ },
341
+ {
342
+ "epoch": 15.36,
343
+ "learning_rate": 8.061420345489445e-07,
344
+ "loss": 1.4782,
345
+ "step": 8000
346
+ },
347
+ {
348
+ "epoch": 16.0,
349
+ "eval_f1": 0.9122,
350
+ "eval_gen_len": 27.571636363636365,
351
+ "eval_loss": 1.514625906944275,
352
+ "eval_precision": 0.9118,
353
+ "eval_recall": 0.9131,
354
+ "eval_rouge1": 0.4863,
355
+ "eval_rouge2": 0.2348,
356
+ "eval_rougeL": 0.4011,
357
+ "eval_rougeLsum": 0.4012,
358
+ "eval_runtime": 505.7467,
359
+ "eval_samples_per_second": 5.438,
360
+ "eval_steps_per_second": 0.34,
361
+ "step": 8336
362
+ },
363
+ {
364
+ "epoch": 16.0,
365
+ "step": 8336,
366
+ "total_flos": 1.1557816346520453e+18,
367
+ "train_loss": 0.8733468595713434,
368
+ "train_runtime": 22758.1802,
369
+ "train_samples_per_second": 35.152,
370
+ "train_steps_per_second": 0.366
371
+ }
372
+ ],
373
+ "logging_steps": 500,
374
+ "max_steps": 8336,
375
+ "num_input_tokens_seen": 0,
376
+ "num_train_epochs": 16,
377
+ "save_steps": 500,
378
+ "total_flos": 1.1557816346520453e+18,
379
+ "train_batch_size": 24,
380
+ "trial_name": null,
381
+ "trial_params": null
382
+ }