MUNasir commited on
Commit
bb6f0e5
1 Parent(s): 546be15

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +344 -0
trainer_state.json ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 19.3929173693086,
5
+ "global_step": 11500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.84,
12
+ "learning_rate": 9.578414839797639e-06,
13
+ "loss": 0.3136,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_bleu": 17.1673,
19
+ "eval_gen_len": 43.5832,
20
+ "eval_loss": 2.7004430294036865,
21
+ "eval_runtime": 263.1603,
22
+ "eval_samples_per_second": 3.792,
23
+ "eval_steps_per_second": 0.475,
24
+ "step": 593
25
+ },
26
+ {
27
+ "epoch": 1.69,
28
+ "learning_rate": 9.156829679595279e-06,
29
+ "loss": 0.2962,
30
+ "step": 1000
31
+ },
32
+ {
33
+ "epoch": 2.0,
34
+ "eval_bleu": 17.2012,
35
+ "eval_gen_len": 43.6693,
36
+ "eval_loss": 2.719139814376831,
37
+ "eval_runtime": 263.247,
38
+ "eval_samples_per_second": 3.791,
39
+ "eval_steps_per_second": 0.475,
40
+ "step": 1186
41
+ },
42
+ {
43
+ "epoch": 2.53,
44
+ "learning_rate": 8.735244519392918e-06,
45
+ "loss": 0.2927,
46
+ "step": 1500
47
+ },
48
+ {
49
+ "epoch": 3.0,
50
+ "eval_bleu": 17.2291,
51
+ "eval_gen_len": 43.482,
52
+ "eval_loss": 2.7412936687469482,
53
+ "eval_runtime": 260.7955,
54
+ "eval_samples_per_second": 3.827,
55
+ "eval_steps_per_second": 0.479,
56
+ "step": 1779
57
+ },
58
+ {
59
+ "epoch": 3.37,
60
+ "learning_rate": 8.313659359190556e-06,
61
+ "loss": 0.2677,
62
+ "step": 2000
63
+ },
64
+ {
65
+ "epoch": 4.0,
66
+ "eval_bleu": 17.135,
67
+ "eval_gen_len": 43.5862,
68
+ "eval_loss": 2.7617862224578857,
69
+ "eval_runtime": 261.1749,
70
+ "eval_samples_per_second": 3.821,
71
+ "eval_steps_per_second": 0.479,
72
+ "step": 2372
73
+ },
74
+ {
75
+ "epoch": 4.22,
76
+ "learning_rate": 7.892074198988196e-06,
77
+ "loss": 0.2591,
78
+ "step": 2500
79
+ },
80
+ {
81
+ "epoch": 5.0,
82
+ "eval_bleu": 17.5543,
83
+ "eval_gen_len": 43.5922,
84
+ "eval_loss": 2.7780115604400635,
85
+ "eval_runtime": 262.647,
86
+ "eval_samples_per_second": 3.8,
87
+ "eval_steps_per_second": 0.476,
88
+ "step": 2965
89
+ },
90
+ {
91
+ "epoch": 5.06,
92
+ "learning_rate": 7.470489038785835e-06,
93
+ "loss": 0.2473,
94
+ "step": 3000
95
+ },
96
+ {
97
+ "epoch": 5.9,
98
+ "learning_rate": 7.048903878583474e-06,
99
+ "loss": 0.2282,
100
+ "step": 3500
101
+ },
102
+ {
103
+ "epoch": 6.0,
104
+ "eval_bleu": 17.226,
105
+ "eval_gen_len": 43.6703,
106
+ "eval_loss": 2.794311761856079,
107
+ "eval_runtime": 263.8826,
108
+ "eval_samples_per_second": 3.782,
109
+ "eval_steps_per_second": 0.474,
110
+ "step": 3558
111
+ },
112
+ {
113
+ "epoch": 6.75,
114
+ "learning_rate": 6.6273187183811136e-06,
115
+ "loss": 0.2244,
116
+ "step": 4000
117
+ },
118
+ {
119
+ "epoch": 7.0,
120
+ "eval_bleu": 17.615,
121
+ "eval_gen_len": 43.6934,
122
+ "eval_loss": 2.808680295944214,
123
+ "eval_runtime": 264.0527,
124
+ "eval_samples_per_second": 3.78,
125
+ "eval_steps_per_second": 0.473,
126
+ "step": 4151
127
+ },
128
+ {
129
+ "epoch": 7.59,
130
+ "learning_rate": 6.2057335581787524e-06,
131
+ "loss": 0.2196,
132
+ "step": 4500
133
+ },
134
+ {
135
+ "epoch": 8.0,
136
+ "eval_bleu": 17.3227,
137
+ "eval_gen_len": 43.7715,
138
+ "eval_loss": 2.825133800506592,
139
+ "eval_runtime": 281.974,
140
+ "eval_samples_per_second": 3.539,
141
+ "eval_steps_per_second": 0.443,
142
+ "step": 4744
143
+ },
144
+ {
145
+ "epoch": 8.43,
146
+ "learning_rate": 5.784148397976391e-06,
147
+ "loss": 0.2101,
148
+ "step": 5000
149
+ },
150
+ {
151
+ "epoch": 9.0,
152
+ "eval_bleu": 17.5072,
153
+ "eval_gen_len": 43.7084,
154
+ "eval_loss": 2.834676742553711,
155
+ "eval_runtime": 263.2231,
156
+ "eval_samples_per_second": 3.791,
157
+ "eval_steps_per_second": 0.475,
158
+ "step": 5337
159
+ },
160
+ {
161
+ "epoch": 9.27,
162
+ "learning_rate": 5.362563237774031e-06,
163
+ "loss": 0.2077,
164
+ "step": 5500
165
+ },
166
+ {
167
+ "epoch": 10.0,
168
+ "eval_bleu": 17.5712,
169
+ "eval_gen_len": 43.8597,
170
+ "eval_loss": 2.842376708984375,
171
+ "eval_runtime": 270.4686,
172
+ "eval_samples_per_second": 3.69,
173
+ "eval_steps_per_second": 0.462,
174
+ "step": 5930
175
+ },
176
+ {
177
+ "epoch": 10.12,
178
+ "learning_rate": 4.94097807757167e-06,
179
+ "loss": 0.2034,
180
+ "step": 6000
181
+ },
182
+ {
183
+ "epoch": 10.96,
184
+ "learning_rate": 4.519392917369309e-06,
185
+ "loss": 0.1968,
186
+ "step": 6500
187
+ },
188
+ {
189
+ "epoch": 11.0,
190
+ "eval_bleu": 17.6007,
191
+ "eval_gen_len": 43.6994,
192
+ "eval_loss": 2.851884365081787,
193
+ "eval_runtime": 261.9767,
194
+ "eval_samples_per_second": 3.809,
195
+ "eval_steps_per_second": 0.477,
196
+ "step": 6523
197
+ },
198
+ {
199
+ "epoch": 11.8,
200
+ "learning_rate": 4.097807757166948e-06,
201
+ "loss": 0.1902,
202
+ "step": 7000
203
+ },
204
+ {
205
+ "epoch": 12.0,
206
+ "eval_bleu": 17.6333,
207
+ "eval_gen_len": 43.6924,
208
+ "eval_loss": 2.8614132404327393,
209
+ "eval_runtime": 263.2972,
210
+ "eval_samples_per_second": 3.79,
211
+ "eval_steps_per_second": 0.475,
212
+ "step": 7116
213
+ },
214
+ {
215
+ "epoch": 12.65,
216
+ "learning_rate": 3.676222596964587e-06,
217
+ "loss": 0.198,
218
+ "step": 7500
219
+ },
220
+ {
221
+ "epoch": 13.0,
222
+ "eval_bleu": 17.6153,
223
+ "eval_gen_len": 43.7034,
224
+ "eval_loss": 2.865877866744995,
225
+ "eval_runtime": 261.219,
226
+ "eval_samples_per_second": 3.821,
227
+ "eval_steps_per_second": 0.479,
228
+ "step": 7709
229
+ },
230
+ {
231
+ "epoch": 13.49,
232
+ "learning_rate": 3.2546374367622263e-06,
233
+ "loss": 0.1861,
234
+ "step": 8000
235
+ },
236
+ {
237
+ "epoch": 14.0,
238
+ "eval_bleu": 17.5959,
239
+ "eval_gen_len": 43.7154,
240
+ "eval_loss": 2.873347043991089,
241
+ "eval_runtime": 260.1505,
242
+ "eval_samples_per_second": 3.836,
243
+ "eval_steps_per_second": 0.48,
244
+ "step": 8302
245
+ },
246
+ {
247
+ "epoch": 14.33,
248
+ "learning_rate": 2.8330522765598656e-06,
249
+ "loss": 0.1956,
250
+ "step": 8500
251
+ },
252
+ {
253
+ "epoch": 15.0,
254
+ "eval_bleu": 17.6169,
255
+ "eval_gen_len": 43.7164,
256
+ "eval_loss": 2.876323938369751,
257
+ "eval_runtime": 261.1714,
258
+ "eval_samples_per_second": 3.821,
259
+ "eval_steps_per_second": 0.479,
260
+ "step": 8895
261
+ },
262
+ {
263
+ "epoch": 15.18,
264
+ "learning_rate": 2.4114671163575045e-06,
265
+ "loss": 0.1924,
266
+ "step": 9000
267
+ },
268
+ {
269
+ "epoch": 16.0,
270
+ "eval_bleu": 17.5443,
271
+ "eval_gen_len": 43.7194,
272
+ "eval_loss": 2.880269765853882,
273
+ "eval_runtime": 261.8101,
274
+ "eval_samples_per_second": 3.812,
275
+ "eval_steps_per_second": 0.477,
276
+ "step": 9488
277
+ },
278
+ {
279
+ "epoch": 16.02,
280
+ "learning_rate": 1.9898819561551434e-06,
281
+ "loss": 0.1946,
282
+ "step": 9500
283
+ },
284
+ {
285
+ "epoch": 16.86,
286
+ "learning_rate": 1.5682967959527825e-06,
287
+ "loss": 0.1946,
288
+ "step": 10000
289
+ },
290
+ {
291
+ "epoch": 17.0,
292
+ "eval_bleu": 17.577,
293
+ "eval_gen_len": 43.6142,
294
+ "eval_loss": 2.8834807872772217,
295
+ "eval_runtime": 259.2401,
296
+ "eval_samples_per_second": 3.85,
297
+ "eval_steps_per_second": 0.482,
298
+ "step": 10081
299
+ },
300
+ {
301
+ "epoch": 17.71,
302
+ "learning_rate": 1.1467116357504218e-06,
303
+ "loss": 0.1987,
304
+ "step": 10500
305
+ },
306
+ {
307
+ "epoch": 18.0,
308
+ "eval_bleu": 17.5677,
309
+ "eval_gen_len": 43.6623,
310
+ "eval_loss": 2.8818464279174805,
311
+ "eval_runtime": 269.8962,
312
+ "eval_samples_per_second": 3.698,
313
+ "eval_steps_per_second": 0.463,
314
+ "step": 10674
315
+ },
316
+ {
317
+ "epoch": 18.55,
318
+ "learning_rate": 7.251264755480608e-07,
319
+ "loss": 0.2011,
320
+ "step": 11000
321
+ },
322
+ {
323
+ "epoch": 19.0,
324
+ "eval_bleu": 17.6118,
325
+ "eval_gen_len": 43.7395,
326
+ "eval_loss": 2.882765531539917,
327
+ "eval_runtime": 265.3175,
328
+ "eval_samples_per_second": 3.762,
329
+ "eval_steps_per_second": 0.471,
330
+ "step": 11267
331
+ },
332
+ {
333
+ "epoch": 19.39,
334
+ "learning_rate": 3.0354131534569986e-07,
335
+ "loss": 0.2049,
336
+ "step": 11500
337
+ }
338
+ ],
339
+ "max_steps": 11860,
340
+ "num_train_epochs": 20,
341
+ "total_flos": 1623592378957824.0,
342
+ "trial_name": null,
343
+ "trial_params": null
344
+ }