kaifanli commited on
Commit
40292f2
1 Parent(s): 685f1bd

Training in progress, epoch 1

Browse files
all_results.json CHANGED
@@ -1,28 +1,28 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_gen_len": 19.080696202531644,
4
- "eval_loss": 2.338231325149536,
5
- "eval_rouge1": 3.8334,
6
- "eval_rouge2": 0.7391,
7
- "eval_rougeL": 2.6123,
8
- "eval_rougeLsum": 3.4838,
9
- "eval_runtime": 447.0392,
10
  "eval_samples": 632,
11
- "eval_samples_per_second": 1.414,
12
- "eval_steps_per_second": 1.414,
13
- "predict_gen_len": 56.19065420560748,
14
- "predict_loss": 2.9006059169769287,
15
- "predict_rouge1": 15.964,
16
- "predict_rouge2": 2.625,
17
- "predict_rougeL": 10.5843,
18
- "predict_rougeLsum": 13.4933,
19
- "predict_runtime": 286.0012,
20
  "predict_samples": 535,
21
- "predict_samples_per_second": 1.871,
22
- "predict_steps_per_second": 1.871,
23
- "train_loss": 1.6083203901324357,
24
- "train_runtime": 3729.4608,
25
  "train_samples": 4332,
26
- "train_samples_per_second": 5.808,
27
- "train_steps_per_second": 5.808
28
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_gen_len": 57.35284810126582,
4
+ "eval_loss": 3.128607988357544,
5
+ "eval_rouge1": 13.7182,
6
+ "eval_rouge2": 2.311,
7
+ "eval_rougeL": 9.1726,
8
+ "eval_rougeLsum": 11.5058,
9
+ "eval_runtime": 359.9186,
10
  "eval_samples": 632,
11
+ "eval_samples_per_second": 1.756,
12
+ "eval_steps_per_second": 1.756,
13
+ "predict_gen_len": 52.28971962616822,
14
+ "predict_loss": 3.2279164791107178,
15
+ "predict_rouge1": 16.0977,
16
+ "predict_rouge2": 2.9966,
17
+ "predict_rougeL": 10.7567,
18
+ "predict_rougeLsum": 12.5377,
19
+ "predict_runtime": 273.2778,
20
  "predict_samples": 535,
21
+ "predict_samples_per_second": 1.958,
22
+ "predict_steps_per_second": 1.958,
23
+ "train_loss": 0.23670565184904047,
24
+ "train_runtime": 3830.6771,
25
  "train_samples": 4332,
26
+ "train_samples_per_second": 5.654,
27
+ "train_steps_per_second": 5.654
28
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_gen_len": 19.080696202531644,
4
- "eval_loss": 2.338231325149536,
5
- "eval_rouge1": 3.8334,
6
- "eval_rouge2": 0.7391,
7
- "eval_rougeL": 2.6123,
8
- "eval_rougeLsum": 3.4838,
9
- "eval_runtime": 447.0392,
10
  "eval_samples": 632,
11
- "eval_samples_per_second": 1.414,
12
- "eval_steps_per_second": 1.414
13
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_gen_len": 57.35284810126582,
4
+ "eval_loss": 3.128607988357544,
5
+ "eval_rouge1": 13.7182,
6
+ "eval_rouge2": 2.311,
7
+ "eval_rougeL": 9.1726,
8
+ "eval_rougeLsum": 11.5058,
9
+ "eval_runtime": 359.9186,
10
  "eval_samples": 632,
11
+ "eval_samples_per_second": 1.756,
12
+ "eval_steps_per_second": 1.756
13
  }
generated_predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
predict_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "predict_gen_len": 56.19065420560748,
3
- "predict_loss": 2.9006059169769287,
4
- "predict_rouge1": 15.964,
5
- "predict_rouge2": 2.625,
6
- "predict_rougeL": 10.5843,
7
- "predict_rougeLsum": 13.4933,
8
- "predict_runtime": 286.0012,
9
  "predict_samples": 535,
10
- "predict_samples_per_second": 1.871,
11
- "predict_steps_per_second": 1.871
12
  }
 
1
  {
2
+ "predict_gen_len": 52.28971962616822,
3
+ "predict_loss": 3.2279164791107178,
4
+ "predict_rouge1": 16.0977,
5
+ "predict_rouge2": 2.9966,
6
+ "predict_rougeL": 10.7567,
7
+ "predict_rougeLsum": 12.5377,
8
+ "predict_runtime": 273.2778,
9
  "predict_samples": 535,
10
+ "predict_samples_per_second": 1.958,
11
+ "predict_steps_per_second": 1.958
12
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5b02c795fa44cef351d2b611c9e58c3c68f4d185a0ae4c411e25de0ec551cc7
3
  size 501807853
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:172991dc78c9918db2529f7adc381600a30e38b8aec3163b28636fb580629e59
3
  size 501807853
runs/Mar22_12-27-47_kogecha/events.out.tfevents.1711082385.kogecha ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:195f4236267b24b54e5763b09fad05e35237ee01c5f4f42d56115a70237c9de8
3
+ size 575
runs/Mar26_19-29-34_kogecha/events.out.tfevents.1711448988.kogecha ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8a3e81acffc3b7e3a1d8bede7cc9f6b98afafc216ade878e4afd75083aa7664
3
+ size 7434
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 1.6083203901324357,
4
- "train_runtime": 3729.4608,
5
  "train_samples": 4332,
6
- "train_samples_per_second": 5.808,
7
- "train_steps_per_second": 5.808
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.23670565184904047,
4
+ "train_runtime": 3830.6771,
5
  "train_samples": 4332,
6
+ "train_samples_per_second": 5.654,
7
+ "train_steps_per_second": 5.654
8
  }
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "best_metric": 3.8334,
3
- "best_model_checkpoint": "saved/tobyoki-pairwise/bart-base-japanese/BaseModel/checkpoint-21660",
4
  "epoch": 5.0,
5
  "global_step": 21660,
6
  "is_hyper_param_search": false,
@@ -9,335 +9,335 @@
9
  "log_history": [
10
  {
11
  "epoch": 0.12,
12
- "learning_rate": 2.930747922437673e-06,
13
- "loss": 3.4399,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.23,
18
- "learning_rate": 2.8614958448753465e-06,
19
- "loss": 2.7955,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 0.35,
24
- "learning_rate": 2.7922437673130195e-06,
25
- "loss": 2.5456,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 0.46,
30
- "learning_rate": 2.7229916897506925e-06,
31
- "loss": 2.3587,
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 0.58,
36
- "learning_rate": 2.6537396121883655e-06,
37
- "loss": 2.2348,
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 0.69,
42
- "learning_rate": 2.584487534626039e-06,
43
- "loss": 2.1504,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.81,
48
- "learning_rate": 2.515235457063712e-06,
49
- "loss": 2.0208,
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 0.92,
54
- "learning_rate": 2.445983379501385e-06,
55
- "loss": 1.9773,
56
  "step": 4000
57
  },
58
  {
59
  "epoch": 1.0,
60
- "eval_gen_len": 9.685126582278482,
61
- "eval_loss": 2.306642532348633,
62
- "eval_rouge1": 2.1349,
63
- "eval_rouge2": 0.4575,
64
- "eval_rougeL": 1.5719,
65
- "eval_rougeLsum": 1.9249,
66
- "eval_runtime": 120.637,
67
- "eval_samples_per_second": 5.239,
68
- "eval_steps_per_second": 5.239,
69
  "step": 4332
70
  },
71
  {
72
  "epoch": 1.04,
73
- "learning_rate": 2.376731301939058e-06,
74
- "loss": 1.871,
75
  "step": 4500
76
  },
77
  {
78
  "epoch": 1.15,
79
- "learning_rate": 2.3074792243767314e-06,
80
- "loss": 1.8827,
81
  "step": 5000
82
  },
83
  {
84
  "epoch": 1.27,
85
- "learning_rate": 2.2382271468144044e-06,
86
- "loss": 1.7772,
87
  "step": 5500
88
  },
89
  {
90
  "epoch": 1.39,
91
- "learning_rate": 2.1689750692520774e-06,
92
- "loss": 1.7125,
93
  "step": 6000
94
  },
95
  {
96
  "epoch": 1.5,
97
- "learning_rate": 2.099722991689751e-06,
98
- "loss": 1.6817,
99
  "step": 6500
100
  },
101
  {
102
  "epoch": 1.62,
103
- "learning_rate": 2.030470914127424e-06,
104
- "loss": 1.6964,
105
  "step": 7000
106
  },
107
  {
108
  "epoch": 1.73,
109
- "learning_rate": 1.961218836565097e-06,
110
- "loss": 1.6218,
111
  "step": 7500
112
  },
113
  {
114
  "epoch": 1.85,
115
- "learning_rate": 1.89196675900277e-06,
116
- "loss": 1.5563,
117
  "step": 8000
118
  },
119
  {
120
  "epoch": 1.96,
121
- "learning_rate": 1.822714681440443e-06,
122
- "loss": 1.5625,
123
  "step": 8500
124
  },
125
  {
126
  "epoch": 2.0,
127
- "eval_gen_len": 14.810126582278482,
128
- "eval_loss": 2.293116807937622,
129
- "eval_rouge1": 3.6283,
130
- "eval_rouge2": 0.7297,
131
- "eval_rougeL": 2.5484,
132
- "eval_rougeLsum": 3.285,
133
- "eval_runtime": 289.1187,
134
- "eval_samples_per_second": 2.186,
135
- "eval_steps_per_second": 2.186,
136
  "step": 8664
137
  },
138
  {
139
  "epoch": 2.08,
140
- "learning_rate": 1.7534626038781163e-06,
141
- "loss": 1.515,
142
  "step": 9000
143
  },
144
  {
145
  "epoch": 2.19,
146
- "learning_rate": 1.6842105263157895e-06,
147
- "loss": 1.5016,
148
  "step": 9500
149
  },
150
  {
151
  "epoch": 2.31,
152
- "learning_rate": 1.6149584487534625e-06,
153
- "loss": 1.4582,
154
  "step": 10000
155
  },
156
  {
157
  "epoch": 2.42,
158
- "learning_rate": 1.5457063711911357e-06,
159
- "loss": 1.4346,
160
  "step": 10500
161
  },
162
  {
163
  "epoch": 2.54,
164
- "learning_rate": 1.476454293628809e-06,
165
- "loss": 1.4243,
166
  "step": 11000
167
  },
168
  {
169
  "epoch": 2.65,
170
- "learning_rate": 1.4072022160664822e-06,
171
- "loss": 1.447,
172
  "step": 11500
173
  },
174
  {
175
  "epoch": 2.77,
176
- "learning_rate": 1.3379501385041552e-06,
177
- "loss": 1.4036,
178
  "step": 12000
179
  },
180
  {
181
  "epoch": 2.89,
182
- "learning_rate": 1.2686980609418284e-06,
183
- "loss": 1.3739,
184
  "step": 12500
185
  },
186
  {
187
  "epoch": 3.0,
188
- "eval_gen_len": 12.979430379746836,
189
- "eval_loss": 2.3153417110443115,
190
- "eval_rouge1": 2.6835,
191
- "eval_rouge2": 0.5213,
192
- "eval_rougeL": 1.9015,
193
- "eval_rougeLsum": 2.5034,
194
- "eval_runtime": 348.2183,
195
- "eval_samples_per_second": 1.815,
196
- "eval_steps_per_second": 1.815,
197
  "step": 12996
198
  },
199
  {
200
  "epoch": 3.0,
201
- "learning_rate": 1.1994459833795014e-06,
202
- "loss": 1.3701,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 3.12,
207
- "learning_rate": 1.1301939058171746e-06,
208
- "loss": 1.3214,
209
  "step": 13500
210
  },
211
  {
212
  "epoch": 3.23,
213
- "learning_rate": 1.0609418282548476e-06,
214
- "loss": 1.3884,
215
  "step": 14000
216
  },
217
  {
218
  "epoch": 3.35,
219
- "learning_rate": 9.916897506925209e-07,
220
- "loss": 1.3147,
221
  "step": 14500
222
  },
223
  {
224
  "epoch": 3.46,
225
- "learning_rate": 9.22437673130194e-07,
226
- "loss": 1.2957,
227
  "step": 15000
228
  },
229
  {
230
  "epoch": 3.58,
231
- "learning_rate": 8.531855955678671e-07,
232
- "loss": 1.304,
233
  "step": 15500
234
  },
235
  {
236
  "epoch": 3.69,
237
- "learning_rate": 7.839335180055402e-07,
238
- "loss": 1.2635,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 3.81,
243
- "learning_rate": 7.146814404432133e-07,
244
- "loss": 1.2657,
245
  "step": 16500
246
  },
247
  {
248
  "epoch": 3.92,
249
- "learning_rate": 6.454293628808864e-07,
250
- "loss": 1.2579,
251
  "step": 17000
252
  },
253
  {
254
  "epoch": 4.0,
255
- "eval_gen_len": 17.72151898734177,
256
- "eval_loss": 2.337430477142334,
257
- "eval_rouge1": 3.4587,
258
- "eval_rouge2": 0.6968,
259
- "eval_rougeL": 2.3777,
260
- "eval_rougeLsum": 3.1843,
261
- "eval_runtime": 479.8504,
262
- "eval_samples_per_second": 1.317,
263
- "eval_steps_per_second": 1.317,
264
  "step": 17328
265
  },
266
  {
267
  "epoch": 4.04,
268
- "learning_rate": 5.761772853185595e-07,
269
- "loss": 1.2155,
270
  "step": 17500
271
  },
272
  {
273
  "epoch": 4.16,
274
- "learning_rate": 5.069252077562327e-07,
275
- "loss": 1.2263,
276
  "step": 18000
277
  },
278
  {
279
  "epoch": 4.27,
280
- "learning_rate": 4.376731301939058e-07,
281
- "loss": 1.2494,
282
  "step": 18500
283
  },
284
  {
285
  "epoch": 4.39,
286
- "learning_rate": 3.684210526315789e-07,
287
- "loss": 1.2308,
288
  "step": 19000
289
  },
290
  {
291
  "epoch": 4.5,
292
- "learning_rate": 2.991689750692521e-07,
293
- "loss": 1.2468,
294
  "step": 19500
295
  },
296
  {
297
  "epoch": 4.62,
298
- "learning_rate": 2.2991689750692521e-07,
299
- "loss": 1.2399,
300
  "step": 20000
301
  },
302
  {
303
  "epoch": 4.73,
304
- "learning_rate": 1.6066481994459835e-07,
305
- "loss": 1.1831,
306
  "step": 20500
307
  },
308
  {
309
  "epoch": 4.85,
310
- "learning_rate": 9.141274238227148e-08,
311
- "loss": 1.2622,
312
  "step": 21000
313
  },
314
  {
315
  "epoch": 4.96,
316
- "learning_rate": 2.2160664819944597e-08,
317
- "loss": 1.2145,
318
  "step": 21500
319
  },
320
  {
321
  "epoch": 5.0,
322
- "eval_gen_len": 19.080696202531644,
323
- "eval_loss": 2.338231325149536,
324
- "eval_rouge1": 3.8334,
325
- "eval_rouge2": 0.7391,
326
- "eval_rougeL": 2.6123,
327
- "eval_rougeLsum": 3.4838,
328
- "eval_runtime": 447.0874,
329
- "eval_samples_per_second": 1.414,
330
- "eval_steps_per_second": 1.414,
331
  "step": 21660
332
  },
333
  {
334
  "epoch": 5.0,
335
  "step": 21660,
336
  "total_flos": 3568971203850240.0,
337
- "train_loss": 1.6083203901324357,
338
- "train_runtime": 3729.4608,
339
- "train_samples_per_second": 5.808,
340
- "train_steps_per_second": 5.808
341
  }
342
  ],
343
  "max_steps": 21660,
 
1
  {
2
+ "best_metric": 13.7182,
3
+ "best_model_checkpoint": "saved/tobyoki-pairwise/bart-base-japanese/BaseModel/checkpoint-8664",
4
  "epoch": 5.0,
5
  "global_step": 21660,
6
  "is_hyper_param_search": false,
 
9
  "log_history": [
10
  {
11
  "epoch": 0.12,
12
+ "learning_rate": 4.8845798707294554e-05,
13
+ "loss": 2.1628,
14
  "step": 500
15
  },
16
  {
17
  "epoch": 0.23,
18
+ "learning_rate": 4.7691597414589107e-05,
19
+ "loss": 1.3515,
20
  "step": 1000
21
  },
22
  {
23
  "epoch": 0.35,
24
+ "learning_rate": 4.653739612188366e-05,
25
+ "loss": 0.9577,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 0.46,
30
+ "learning_rate": 4.538319482917821e-05,
31
+ "loss": 0.7042,
32
  "step": 2000
33
  },
34
  {
35
  "epoch": 0.58,
36
+ "learning_rate": 4.422899353647276e-05,
37
+ "loss": 0.5935,
38
  "step": 2500
39
  },
40
  {
41
  "epoch": 0.69,
42
+ "learning_rate": 4.3074792243767315e-05,
43
+ "loss": 0.4658,
44
  "step": 3000
45
  },
46
  {
47
  "epoch": 0.81,
48
+ "learning_rate": 4.192059095106187e-05,
49
+ "loss": 0.3854,
50
  "step": 3500
51
  },
52
  {
53
  "epoch": 0.92,
54
+ "learning_rate": 4.076638965835642e-05,
55
+ "loss": 0.2994,
56
  "step": 4000
57
  },
58
  {
59
  "epoch": 1.0,
60
+ "eval_gen_len": 55.063291139240505,
61
+ "eval_loss": 2.7883288860321045,
62
+ "eval_rouge1": 11.1611,
63
+ "eval_rouge2": 1.7768,
64
+ "eval_rougeL": 7.5158,
65
+ "eval_rougeLsum": 9.6222,
66
+ "eval_runtime": 352.9978,
67
+ "eval_samples_per_second": 1.79,
68
+ "eval_steps_per_second": 1.79,
69
  "step": 4332
70
  },
71
  {
72
  "epoch": 1.04,
73
+ "learning_rate": 3.961218836565097e-05,
74
+ "loss": 0.2806,
75
  "step": 4500
76
  },
77
  {
78
  "epoch": 1.15,
79
+ "learning_rate": 3.845798707294552e-05,
80
+ "loss": 0.263,
81
  "step": 5000
82
  },
83
  {
84
  "epoch": 1.27,
85
+ "learning_rate": 3.7303785780240075e-05,
86
+ "loss": 0.2187,
87
  "step": 5500
88
  },
89
  {
90
  "epoch": 1.39,
91
+ "learning_rate": 3.614958448753463e-05,
92
+ "loss": 0.1761,
93
  "step": 6000
94
  },
95
  {
96
  "epoch": 1.5,
97
+ "learning_rate": 3.499538319482918e-05,
98
+ "loss": 0.1664,
99
  "step": 6500
100
  },
101
  {
102
  "epoch": 1.62,
103
+ "learning_rate": 3.384118190212373e-05,
104
+ "loss": 0.1896,
105
  "step": 7000
106
  },
107
  {
108
  "epoch": 1.73,
109
+ "learning_rate": 3.2686980609418284e-05,
110
+ "loss": 0.1627,
111
  "step": 7500
112
  },
113
  {
114
  "epoch": 1.85,
115
+ "learning_rate": 3.1532779316712836e-05,
116
+ "loss": 0.1146,
117
  "step": 8000
118
  },
119
  {
120
  "epoch": 1.96,
121
+ "learning_rate": 3.0378578024007388e-05,
122
+ "loss": 0.1513,
123
  "step": 8500
124
  },
125
  {
126
  "epoch": 2.0,
127
+ "eval_gen_len": 57.35284810126582,
128
+ "eval_loss": 3.128607988357544,
129
+ "eval_rouge1": 13.7182,
130
+ "eval_rouge2": 2.311,
131
+ "eval_rougeL": 9.1726,
132
+ "eval_rougeLsum": 11.5058,
133
+ "eval_runtime": 358.8793,
134
+ "eval_samples_per_second": 1.761,
135
+ "eval_steps_per_second": 1.761,
136
  "step": 8664
137
  },
138
  {
139
  "epoch": 2.08,
140
+ "learning_rate": 2.922437673130194e-05,
141
+ "loss": 0.125,
142
  "step": 9000
143
  },
144
  {
145
  "epoch": 2.19,
146
+ "learning_rate": 2.8070175438596492e-05,
147
+ "loss": 0.1028,
148
  "step": 9500
149
  },
150
  {
151
  "epoch": 2.31,
152
+ "learning_rate": 2.6915974145891044e-05,
153
+ "loss": 0.0934,
154
  "step": 10000
155
  },
156
  {
157
  "epoch": 2.42,
158
+ "learning_rate": 2.5761772853185596e-05,
159
+ "loss": 0.0993,
160
  "step": 10500
161
  },
162
  {
163
  "epoch": 2.54,
164
+ "learning_rate": 2.460757156048015e-05,
165
+ "loss": 0.074,
166
  "step": 11000
167
  },
168
  {
169
  "epoch": 2.65,
170
+ "learning_rate": 2.3453370267774704e-05,
171
+ "loss": 0.0857,
172
  "step": 11500
173
  },
174
  {
175
  "epoch": 2.77,
176
+ "learning_rate": 2.2299168975069256e-05,
177
+ "loss": 0.0823,
178
  "step": 12000
179
  },
180
  {
181
  "epoch": 2.89,
182
+ "learning_rate": 2.1144967682363804e-05,
183
+ "loss": 0.0778,
184
  "step": 12500
185
  },
186
  {
187
  "epoch": 3.0,
188
+ "eval_gen_len": 48.70886075949367,
189
+ "eval_loss": 3.3238439559936523,
190
+ "eval_rouge1": 12.1173,
191
+ "eval_rouge2": 1.88,
192
+ "eval_rougeL": 8.1156,
193
+ "eval_rougeLsum": 10.1187,
194
+ "eval_runtime": 315.5777,
195
+ "eval_samples_per_second": 2.003,
196
+ "eval_steps_per_second": 2.003,
197
  "step": 12996
198
  },
199
  {
200
  "epoch": 3.0,
201
+ "learning_rate": 1.9990766389658356e-05,
202
+ "loss": 0.078,
203
  "step": 13000
204
  },
205
  {
206
  "epoch": 3.12,
207
+ "learning_rate": 1.883656509695291e-05,
208
+ "loss": 0.0546,
209
  "step": 13500
210
  },
211
  {
212
  "epoch": 3.23,
213
+ "learning_rate": 1.768236380424746e-05,
214
+ "loss": 0.062,
215
  "step": 14000
216
  },
217
  {
218
  "epoch": 3.35,
219
+ "learning_rate": 1.6528162511542013e-05,
220
+ "loss": 0.0656,
221
  "step": 14500
222
  },
223
  {
224
  "epoch": 3.46,
225
+ "learning_rate": 1.5373961218836565e-05,
226
+ "loss": 0.0555,
227
  "step": 15000
228
  },
229
  {
230
  "epoch": 3.58,
231
+ "learning_rate": 1.4219759926131118e-05,
232
+ "loss": 0.0437,
233
  "step": 15500
234
  },
235
  {
236
  "epoch": 3.69,
237
+ "learning_rate": 1.306555863342567e-05,
238
+ "loss": 0.05,
239
  "step": 16000
240
  },
241
  {
242
  "epoch": 3.81,
243
+ "learning_rate": 1.1911357340720223e-05,
244
+ "loss": 0.0577,
245
  "step": 16500
246
  },
247
  {
248
  "epoch": 3.92,
249
+ "learning_rate": 1.0757156048014775e-05,
250
+ "loss": 0.056,
251
  "step": 17000
252
  },
253
  {
254
  "epoch": 4.0,
255
+ "eval_gen_len": 50.7373417721519,
256
+ "eval_loss": 3.4031858444213867,
257
+ "eval_rouge1": 11.9555,
258
+ "eval_rouge2": 2.0536,
259
+ "eval_rougeL": 8.2185,
260
+ "eval_rougeLsum": 10.0656,
261
+ "eval_runtime": 343.5063,
262
+ "eval_samples_per_second": 1.84,
263
+ "eval_steps_per_second": 1.84,
264
  "step": 17328
265
  },
266
  {
267
  "epoch": 4.04,
268
+ "learning_rate": 9.602954755309327e-06,
269
+ "loss": 0.0416,
270
  "step": 17500
271
  },
272
  {
273
  "epoch": 4.16,
274
+ "learning_rate": 8.448753462603879e-06,
275
+ "loss": 0.0393,
276
  "step": 18000
277
  },
278
  {
279
  "epoch": 4.27,
280
+ "learning_rate": 7.29455216989843e-06,
281
+ "loss": 0.0368,
282
  "step": 18500
283
  },
284
  {
285
  "epoch": 4.39,
286
+ "learning_rate": 6.140350877192982e-06,
287
+ "loss": 0.0433,
288
  "step": 19000
289
  },
290
  {
291
  "epoch": 4.5,
292
+ "learning_rate": 4.986149584487535e-06,
293
+ "loss": 0.0421,
294
  "step": 19500
295
  },
296
  {
297
  "epoch": 4.62,
298
+ "learning_rate": 3.831948291782087e-06,
299
+ "loss": 0.038,
300
  "step": 20000
301
  },
302
  {
303
  "epoch": 4.73,
304
+ "learning_rate": 2.6777469990766392e-06,
305
+ "loss": 0.0285,
306
  "step": 20500
307
  },
308
  {
309
  "epoch": 4.85,
310
+ "learning_rate": 1.5235457063711913e-06,
311
+ "loss": 0.0343,
312
  "step": 21000
313
  },
314
  {
315
  "epoch": 4.96,
316
+ "learning_rate": 3.693444136657433e-07,
317
+ "loss": 0.0364,
318
  "step": 21500
319
  },
320
  {
321
  "epoch": 5.0,
322
+ "eval_gen_len": 50.44462025316456,
323
+ "eval_loss": 3.525162696838379,
324
+ "eval_rouge1": 11.814,
325
+ "eval_rouge2": 1.7965,
326
+ "eval_rougeL": 8.0177,
327
+ "eval_rougeLsum": 9.7342,
328
+ "eval_runtime": 338.0819,
329
+ "eval_samples_per_second": 1.869,
330
+ "eval_steps_per_second": 1.869,
331
  "step": 21660
332
  },
333
  {
334
  "epoch": 5.0,
335
  "step": 21660,
336
  "total_flos": 3568971203850240.0,
337
+ "train_loss": 0.23670565184904047,
338
+ "train_runtime": 3830.6771,
339
+ "train_samples_per_second": 5.654,
340
+ "train_steps_per_second": 5.654
341
  }
342
  ],
343
  "max_steps": 21660,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f6d875f4df131ec83cd852c077704d3d090275db9bc87774bfd7df2a35aaf8e
3
  size 4475
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7081b5cc1106070ff6198b02a5a8687054ba5449cf8518c35628d1536648b8d
3
  size 4475