File size: 7,901 Bytes
e0e38cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
{
  "best_metric": 35.2849,
  "best_model_checkpoint": "large_ox-wn_cod_15ep_eap/checkpoint-38360",
  "epoch": 15.0,
  "global_step": 41100,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "learning_rate": 4.666666666666667e-05,
      "loss": 2.1769,
      "step": 2740
    },
    {
      "epoch": 1.0,
      "eval_gen_len": 11.342940924045202,
      "eval_loss": 1.905047059059143,
      "eval_rouge1": 28.7222,
      "eval_rouge2": 9.1873,
      "eval_rougeL": 26.6888,
      "eval_rougeLsum": 26.6937,
      "eval_runtime": 86.0596,
      "eval_samples_per_second": 162.469,
      "eval_steps_per_second": 1.278,
      "step": 2740
    },
    {
      "epoch": 2.0,
      "learning_rate": 4.3333333333333334e-05,
      "loss": 1.9408,
      "step": 5480
    },
    {
      "epoch": 2.0,
      "eval_gen_len": 11.416464025175225,
      "eval_loss": 1.8151417970657349,
      "eval_rouge1": 29.8799,
      "eval_rouge2": 10.2327,
      "eval_rougeL": 27.7947,
      "eval_rougeLsum": 27.8044,
      "eval_runtime": 98.7019,
      "eval_samples_per_second": 141.659,
      "eval_steps_per_second": 1.114,
      "step": 5480
    },
    {
      "epoch": 3.0,
      "learning_rate": 4e-05,
      "loss": 1.8124,
      "step": 8220
    },
    {
      "epoch": 3.0,
      "eval_gen_len": 11.531039908453726,
      "eval_loss": 1.7607892751693726,
      "eval_rouge1": 30.9845,
      "eval_rouge2": 10.9982,
      "eval_rougeL": 28.8059,
      "eval_rougeLsum": 28.8131,
      "eval_runtime": 96.5011,
      "eval_samples_per_second": 144.889,
      "eval_steps_per_second": 1.14,
      "step": 8220
    },
    {
      "epoch": 4.0,
      "learning_rate": 3.6666666666666666e-05,
      "loss": 1.7118,
      "step": 10960
    },
    {
      "epoch": 4.0,
      "eval_gen_len": 11.703690459161779,
      "eval_loss": 1.7228699922561646,
      "eval_rouge1": 31.6943,
      "eval_rouge2": 11.7412,
      "eval_rougeL": 29.4967,
      "eval_rougeLsum": 29.5319,
      "eval_runtime": 87.7321,
      "eval_samples_per_second": 159.372,
      "eval_steps_per_second": 1.254,
      "step": 10960
    },
    {
      "epoch": 5.0,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 1.6286,
      "step": 13700
    },
    {
      "epoch": 5.0,
      "eval_gen_len": 11.77835788871406,
      "eval_loss": 1.6936795711517334,
      "eval_rouge1": 32.5839,
      "eval_rouge2": 12.2431,
      "eval_rougeL": 30.1799,
      "eval_rougeLsum": 30.206,
      "eval_runtime": 84.5028,
      "eval_samples_per_second": 165.462,
      "eval_steps_per_second": 1.302,
      "step": 13700
    },
    {
      "epoch": 6.0,
      "learning_rate": 3e-05,
      "loss": 1.5597,
      "step": 16440
    },
    {
      "epoch": 6.0,
      "eval_gen_len": 11.597410956944643,
      "eval_loss": 1.674757480621338,
      "eval_rouge1": 32.9915,
      "eval_rouge2": 12.8514,
      "eval_rougeL": 30.7016,
      "eval_rougeLsum": 30.7145,
      "eval_runtime": 87.802,
      "eval_samples_per_second": 159.245,
      "eval_steps_per_second": 1.253,
      "step": 16440
    },
    {
      "epoch": 7.0,
      "learning_rate": 2.6666666666666667e-05,
      "loss": 1.4982,
      "step": 19180
    },
    {
      "epoch": 7.0,
      "eval_gen_len": 11.358031755113718,
      "eval_loss": 1.6578471660614014,
      "eval_rouge1": 33.2157,
      "eval_rouge2": 13.1389,
      "eval_rougeL": 30.9428,
      "eval_rougeLsum": 30.9519,
      "eval_runtime": 89.406,
      "eval_samples_per_second": 156.388,
      "eval_steps_per_second": 1.23,
      "step": 19180
    },
    {
      "epoch": 8.0,
      "learning_rate": 2.3333333333333336e-05,
      "loss": 1.4468,
      "step": 21920
    },
    {
      "epoch": 8.0,
      "eval_gen_len": 11.572378772707767,
      "eval_loss": 1.6473166942596436,
      "eval_rouge1": 33.6146,
      "eval_rouge2": 13.5922,
      "eval_rougeL": 31.3001,
      "eval_rougeLsum": 31.3235,
      "eval_runtime": 98.6248,
      "eval_samples_per_second": 141.77,
      "eval_steps_per_second": 1.115,
      "step": 21920
    },
    {
      "epoch": 9.0,
      "learning_rate": 2e-05,
      "loss": 1.4022,
      "step": 24660
    },
    {
      "epoch": 9.0,
      "eval_gen_len": 11.738878558146189,
      "eval_loss": 1.6383947134017944,
      "eval_rouge1": 34.1711,
      "eval_rouge2": 14.1117,
      "eval_rougeL": 31.7951,
      "eval_rougeLsum": 31.8066,
      "eval_runtime": 89.5372,
      "eval_samples_per_second": 156.159,
      "eval_steps_per_second": 1.229,
      "step": 24660
    },
    {
      "epoch": 10.0,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 1.364,
      "step": 27400
    },
    {
      "epoch": 10.0,
      "eval_gen_len": 11.665856100700902,
      "eval_loss": 1.6336920261383057,
      "eval_rouge1": 34.5489,
      "eval_rouge2": 14.5012,
      "eval_rougeL": 32.1329,
      "eval_rougeLsum": 32.1446,
      "eval_runtime": 103.7766,
      "eval_samples_per_second": 134.732,
      "eval_steps_per_second": 1.06,
      "step": 27400
    },
    {
      "epoch": 11.0,
      "learning_rate": 1.3333333333333333e-05,
      "loss": 1.3321,
      "step": 30140
    },
    {
      "epoch": 11.0,
      "eval_gen_len": 11.800314690316121,
      "eval_loss": 1.6291483640670776,
      "eval_rouge1": 34.7133,
      "eval_rouge2": 14.7297,
      "eval_rougeL": 32.3042,
      "eval_rougeLsum": 32.314,
      "eval_runtime": 91.3961,
      "eval_samples_per_second": 152.982,
      "eval_steps_per_second": 1.204,
      "step": 30140
    },
    {
      "epoch": 12.0,
      "learning_rate": 1e-05,
      "loss": 1.3054,
      "step": 32880
    },
    {
      "epoch": 12.0,
      "eval_gen_len": 11.761908167644114,
      "eval_loss": 1.6267131567001343,
      "eval_rouge1": 34.9411,
      "eval_rouge2": 15.0282,
      "eval_rougeL": 32.5335,
      "eval_rougeLsum": 32.5451,
      "eval_runtime": 98.5092,
      "eval_samples_per_second": 141.936,
      "eval_steps_per_second": 1.117,
      "step": 32880
    },
    {
      "epoch": 13.0,
      "learning_rate": 6.666666666666667e-06,
      "loss": 1.2845,
      "step": 35620
    },
    {
      "epoch": 13.0,
      "eval_gen_len": 11.831712201401801,
      "eval_loss": 1.626239538192749,
      "eval_rouge1": 35.1648,
      "eval_rouge2": 15.2154,
      "eval_rougeL": 32.7387,
      "eval_rougeLsum": 32.742,
      "eval_runtime": 85.528,
      "eval_samples_per_second": 163.479,
      "eval_steps_per_second": 1.286,
      "step": 35620
    },
    {
      "epoch": 14.0,
      "learning_rate": 3.3333333333333333e-06,
      "loss": 1.2699,
      "step": 38360
    },
    {
      "epoch": 14.0,
      "eval_gen_len": 11.816764411386067,
      "eval_loss": 1.6257190704345703,
      "eval_rouge1": 35.2849,
      "eval_rouge2": 15.3109,
      "eval_rougeL": 32.8508,
      "eval_rougeLsum": 32.853,
      "eval_runtime": 84.6116,
      "eval_samples_per_second": 165.249,
      "eval_steps_per_second": 1.3,
      "step": 38360
    },
    {
      "epoch": 15.0,
      "learning_rate": 0.0,
      "loss": 1.2595,
      "step": 41100
    },
    {
      "epoch": 15.0,
      "eval_gen_len": 11.797096266628522,
      "eval_loss": 1.6273423433303833,
      "eval_rouge1": 35.2224,
      "eval_rouge2": 15.2781,
      "eval_rougeL": 32.7718,
      "eval_rougeLsum": 32.7826,
      "eval_runtime": 95.1523,
      "eval_samples_per_second": 146.943,
      "eval_steps_per_second": 1.156,
      "step": 41100
    },
    {
      "epoch": 15.0,
      "step": 41100,
      "total_flos": 9.049973435337277e+17,
      "train_loss": 1.5328590292826185,
      "train_runtime": 18417.6233,
      "train_samples_per_second": 142.797,
      "train_steps_per_second": 2.232
    }
  ],
  "max_steps": 41100,
  "num_train_epochs": 15,
  "total_flos": 9.049973435337277e+17,
  "trial_name": null,
  "trial_params": null
}