JulesGo commited on
Commit
c0287c2
·
verified ·
1 Parent(s): d0c60db

Fin de l'entraînement

Browse files
Files changed (5) hide show
  1. README.md +3 -3
  2. all_results.json +11 -11
  3. eval_results.json +7 -7
  4. train_results.json +5 -5
  5. trainer_state.json +333 -229
README.md CHANGED
@@ -14,9 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
- - Loss: 0.0580
18
- - Mse: 0.1308
19
- - Mae: 0.3150
20
 
21
  ## Model description
22
 
 
14
 
15
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
16
  It achieves the following results on the evaluation set:
17
+ - Loss: 0.0604
18
+ - Mse: 0.1248
19
+ - Mae: 0.3083
20
 
21
  ## Model description
22
 
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 27.0,
3
- "eval_loss": 0.06580791622400284,
4
- "eval_mae": 0.3059428930282593,
5
- "eval_mse": 0.12533096969127655,
6
- "eval_runtime": 10.4766,
7
- "eval_samples_per_second": 7.254,
8
- "eval_steps_per_second": 0.955,
9
  "total_flos": 0.0,
10
- "train_loss": 0.13240765200720894,
11
- "train_runtime": 1534.1197,
12
- "train_samples_per_second": 5.925,
13
- "train_steps_per_second": 0.176
14
  }
 
1
  {
2
+ "epoch": 28.824742268041238,
3
+ "eval_loss": 0.06035061553120613,
4
+ "eval_mae": 0.3083080053329468,
5
+ "eval_mse": 0.12483953684568405,
6
+ "eval_runtime": 26.0473,
7
+ "eval_samples_per_second": 7.448,
8
+ "eval_steps_per_second": 0.96,
9
  "total_flos": 0.0,
10
+ "train_loss": 0.11318666471375359,
11
+ "train_runtime": 4441.3944,
12
+ "train_samples_per_second": 5.221,
13
+ "train_steps_per_second": 0.162
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 27.0,
3
- "eval_loss": 0.06580791622400284,
4
- "eval_mae": 0.3059428930282593,
5
- "eval_mse": 0.12533096969127655,
6
- "eval_runtime": 10.4766,
7
- "eval_samples_per_second": 7.254,
8
- "eval_steps_per_second": 0.955
9
  }
 
1
  {
2
+ "epoch": 28.824742268041238,
3
+ "eval_loss": 0.06035061553120613,
4
+ "eval_mae": 0.3083080053329468,
5
+ "eval_mse": 0.12483953684568405,
6
+ "eval_runtime": 26.0473,
7
+ "eval_samples_per_second": 7.448,
8
+ "eval_steps_per_second": 0.96
9
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 27.0,
3
  "total_flos": 0.0,
4
- "train_loss": 0.13240765200720894,
5
- "train_runtime": 1534.1197,
6
- "train_samples_per_second": 5.925,
7
- "train_steps_per_second": 0.176
8
  }
 
1
  {
2
+ "epoch": 28.824742268041238,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.11318666471375359,
5
+ "train_runtime": 4441.3944,
6
+ "train_samples_per_second": 5.221,
7
+ "train_steps_per_second": 0.162
8
  }
trainer_state.json CHANGED
@@ -1,338 +1,442 @@
1
  {
2
- "best_global_step": 120,
3
- "best_metric": 0.12533096969127655,
4
- "best_model_checkpoint": "./vit_focus/checkpoint-120",
5
- "epoch": 27.0,
6
  "eval_steps": 500,
7
- "global_step": 270,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
- "eval_loss": 0.0993029847741127,
15
- "eval_mae": 0.3380415141582489,
16
- "eval_mse": 0.15286438167095184,
17
- "eval_runtime": 9.9578,
18
- "eval_samples_per_second": 7.632,
19
- "eval_steps_per_second": 1.004,
20
- "step": 10
 
 
 
 
 
 
 
21
  },
22
  {
23
  "epoch": 2.0,
24
- "eval_loss": 0.1050349548459053,
25
- "eval_mae": 0.34093156456947327,
26
- "eval_mse": 0.15535660088062286,
27
- "eval_runtime": 9.7265,
28
- "eval_samples_per_second": 7.814,
29
- "eval_steps_per_second": 1.028,
30
- "step": 20
31
  },
32
  {
33
  "epoch": 3.0,
34
- "eval_loss": 0.09966066479682922,
35
- "eval_mae": 0.3352396786212921,
36
- "eval_mse": 0.14932329952716827,
37
- "eval_runtime": 9.4644,
38
- "eval_samples_per_second": 8.03,
39
- "eval_steps_per_second": 1.057,
40
- "step": 30
 
 
 
 
 
 
 
41
  },
42
  {
43
  "epoch": 4.0,
44
- "grad_norm": 6.926674842834473,
45
- "learning_rate": 4.277777777777778e-05,
46
- "loss": 0.313,
47
- "step": 40
 
 
 
48
  },
49
  {
50
- "epoch": 4.0,
51
- "eval_loss": 0.06556536257266998,
52
- "eval_mae": 0.3157392740249634,
53
- "eval_mse": 0.13447947800159454,
54
- "eval_runtime": 10.1308,
55
- "eval_samples_per_second": 7.502,
56
- "eval_steps_per_second": 0.987,
57
- "step": 40
58
  },
59
  {
60
  "epoch": 5.0,
61
- "eval_loss": 0.06592569500207901,
62
- "eval_mae": 0.3202681839466095,
63
- "eval_mse": 0.13659903407096863,
64
- "eval_runtime": 9.7043,
65
- "eval_samples_per_second": 7.832,
66
- "eval_steps_per_second": 1.03,
67
- "step": 50
68
  },
69
  {
70
  "epoch": 6.0,
71
- "eval_loss": 0.0638844296336174,
72
- "eval_mae": 0.31192123889923096,
73
- "eval_mse": 0.12961846590042114,
74
- "eval_runtime": 9.5179,
75
- "eval_samples_per_second": 7.985,
76
- "eval_steps_per_second": 1.051,
77
- "step": 60
 
 
 
 
 
 
 
78
  },
79
  {
80
  "epoch": 7.0,
81
- "eval_loss": 0.06389027088880539,
82
- "eval_mae": 0.3178236484527588,
83
- "eval_mse": 0.13510307669639587,
84
- "eval_runtime": 9.5025,
85
- "eval_samples_per_second": 7.998,
86
- "eval_steps_per_second": 1.052,
87
- "step": 70
88
  },
89
  {
90
  "epoch": 8.0,
91
- "grad_norm": 4.451300144195557,
92
- "learning_rate": 3.537037037037037e-05,
93
- "loss": 0.1742,
94
- "step": 80
95
  },
96
  {
97
  "epoch": 8.0,
98
- "eval_loss": 0.06391099840402603,
99
- "eval_mae": 0.3085971772670746,
100
- "eval_mse": 0.12736700475215912,
101
- "eval_runtime": 10.1592,
102
- "eval_samples_per_second": 7.481,
103
- "eval_steps_per_second": 0.984,
104
- "step": 80
105
  },
106
  {
107
  "epoch": 9.0,
108
- "eval_loss": 0.07279632240533829,
109
- "eval_mae": 0.3096161186695099,
110
- "eval_mse": 0.12943950295448303,
111
- "eval_runtime": 9.4821,
112
- "eval_samples_per_second": 8.015,
113
- "eval_steps_per_second": 1.055,
114
- "step": 90
 
 
 
 
 
 
 
115
  },
116
  {
117
  "epoch": 10.0,
118
- "eval_loss": 0.06712160259485245,
119
- "eval_mae": 0.3150458335876465,
120
- "eval_mse": 0.13300836086273193,
121
- "eval_runtime": 9.7046,
122
- "eval_samples_per_second": 7.831,
123
- "eval_steps_per_second": 1.03,
124
- "step": 100
125
  },
126
  {
127
  "epoch": 11.0,
128
- "eval_loss": 0.06695493310689926,
129
- "eval_mae": 0.30665045976638794,
130
- "eval_mse": 0.12600918114185333,
131
- "eval_runtime": 9.7852,
132
- "eval_samples_per_second": 7.767,
133
- "eval_steps_per_second": 1.022,
134
- "step": 110
135
  },
136
  {
137
- "epoch": 12.0,
138
- "grad_norm": 2.9957473278045654,
139
- "learning_rate": 2.7962962962962965e-05,
140
- "loss": 0.1284,
141
- "step": 120
142
  },
143
  {
144
  "epoch": 12.0,
145
- "eval_loss": 0.06580791622400284,
146
- "eval_mae": 0.3059428930282593,
147
- "eval_mse": 0.12533096969127655,
148
- "eval_runtime": 9.7135,
149
- "eval_samples_per_second": 7.824,
150
- "eval_steps_per_second": 1.029,
151
- "step": 120
 
 
 
 
 
 
 
152
  },
153
  {
154
  "epoch": 13.0,
155
- "eval_loss": 0.06405826658010483,
156
- "eval_mae": 0.3104270100593567,
157
- "eval_mse": 0.1280805468559265,
158
- "eval_runtime": 12.0741,
159
- "eval_samples_per_second": 6.294,
160
- "eval_steps_per_second": 0.828,
161
- "step": 130
162
  },
163
  {
164
  "epoch": 14.0,
165
- "eval_loss": 0.06428611278533936,
166
- "eval_mae": 0.3104848563671112,
167
- "eval_mse": 0.12893278896808624,
168
- "eval_runtime": 9.5891,
169
- "eval_samples_per_second": 7.926,
170
- "eval_steps_per_second": 1.043,
171
- "step": 140
 
 
 
 
 
 
 
172
  },
173
  {
174
  "epoch": 15.0,
175
- "eval_loss": 0.06487523764371872,
176
- "eval_mae": 0.3171584904193878,
177
- "eval_mse": 0.13420797884464264,
178
- "eval_runtime": 9.5632,
179
- "eval_samples_per_second": 7.947,
180
- "eval_steps_per_second": 1.046,
181
- "step": 150
182
  },
183
  {
184
  "epoch": 16.0,
185
- "grad_norm": 1.922245740890503,
186
- "learning_rate": 2.0555555555555555e-05,
187
- "loss": 0.0981,
188
- "step": 160
189
  },
190
  {
191
  "epoch": 16.0,
192
- "eval_loss": 0.06558659672737122,
193
- "eval_mae": 0.30849871039390564,
194
- "eval_mse": 0.12756428122520447,
195
- "eval_runtime": 9.5905,
196
- "eval_samples_per_second": 7.924,
197
- "eval_steps_per_second": 1.043,
198
- "step": 160
199
  },
200
  {
201
  "epoch": 17.0,
202
- "eval_loss": 0.06274469941854477,
203
- "eval_mae": 0.3136182427406311,
204
- "eval_mse": 0.13160544633865356,
205
- "eval_runtime": 10.0109,
206
- "eval_samples_per_second": 7.592,
207
- "eval_steps_per_second": 0.999,
208
- "step": 170
 
 
 
 
 
 
 
209
  },
210
  {
211
  "epoch": 18.0,
212
- "eval_loss": 0.06201491877436638,
213
- "eval_mae": 0.3168633282184601,
214
- "eval_mse": 0.1343080997467041,
215
- "eval_runtime": 9.9918,
216
- "eval_samples_per_second": 7.606,
217
- "eval_steps_per_second": 1.001,
218
- "step": 180
219
  },
220
  {
221
  "epoch": 19.0,
222
- "eval_loss": 0.0631915032863617,
223
- "eval_mae": 0.31292420625686646,
224
- "eval_mse": 0.13110676407814026,
225
- "eval_runtime": 9.5351,
226
- "eval_samples_per_second": 7.971,
227
- "eval_steps_per_second": 1.049,
228
- "step": 190
229
  },
230
  {
231
- "epoch": 20.0,
232
- "grad_norm": 1.9687647819519043,
233
- "learning_rate": 1.3148148148148148e-05,
234
- "loss": 0.0767,
235
- "step": 200
236
  },
237
  {
238
  "epoch": 20.0,
239
- "eval_loss": 0.06296339631080627,
240
- "eval_mae": 0.3142727017402649,
241
- "eval_mse": 0.1326274573802948,
242
- "eval_runtime": 9.7999,
243
- "eval_samples_per_second": 7.755,
244
- "eval_steps_per_second": 1.02,
245
- "step": 200
 
 
 
 
 
 
 
246
  },
247
  {
248
  "epoch": 21.0,
249
- "eval_loss": 0.06408733129501343,
250
- "eval_mae": 0.311717689037323,
251
- "eval_mse": 0.12986762821674347,
252
- "eval_runtime": 9.6462,
253
- "eval_samples_per_second": 7.879,
254
- "eval_steps_per_second": 1.037,
255
- "step": 210
256
  },
257
  {
258
  "epoch": 22.0,
259
- "eval_loss": 0.06340750306844711,
260
- "eval_mae": 0.3114081621170044,
261
- "eval_mse": 0.12940751016139984,
262
- "eval_runtime": 9.5394,
263
- "eval_samples_per_second": 7.967,
264
- "eval_steps_per_second": 1.048,
265
- "step": 220
 
 
 
 
 
 
 
266
  },
267
  {
268
  "epoch": 23.0,
269
- "eval_loss": 0.06285858899354935,
270
- "eval_mae": 0.31304195523262024,
271
- "eval_mse": 0.13149800896644592,
272
- "eval_runtime": 9.8923,
273
- "eval_samples_per_second": 7.683,
274
- "eval_steps_per_second": 1.011,
275
- "step": 230
276
  },
277
  {
278
  "epoch": 24.0,
279
- "grad_norm": 1.0159116983413696,
280
- "learning_rate": 5.740740740740741e-06,
281
- "loss": 0.0615,
282
- "step": 240
283
  },
284
  {
285
  "epoch": 24.0,
286
- "eval_loss": 0.06115531921386719,
287
- "eval_mae": 0.3123721480369568,
288
- "eval_mse": 0.13078482449054718,
289
- "eval_runtime": 9.6638,
290
- "eval_samples_per_second": 7.864,
291
- "eval_steps_per_second": 1.035,
292
- "step": 240
293
  },
294
  {
295
  "epoch": 25.0,
296
- "eval_loss": 0.059913910925388336,
297
- "eval_mae": 0.31175902485847473,
298
- "eval_mse": 0.13015513122081757,
299
- "eval_runtime": 9.6921,
300
- "eval_samples_per_second": 7.841,
301
- "eval_steps_per_second": 1.032,
302
- "step": 250
303
  },
304
  {
305
- "epoch": 26.0,
306
- "eval_loss": 0.06085545942187309,
307
- "eval_mae": 0.313151478767395,
308
- "eval_mse": 0.13129989802837372,
309
- "eval_runtime": 9.5449,
310
- "eval_samples_per_second": 7.962,
311
- "eval_steps_per_second": 1.048,
312
- "step": 260
313
  },
314
  {
315
- "epoch": 27.0,
316
- "eval_loss": 0.060885023325681686,
317
- "eval_mae": 0.3116842210292816,
318
- "eval_mse": 0.13011318445205688,
319
- "eval_runtime": 9.6423,
320
- "eval_samples_per_second": 7.882,
321
- "eval_steps_per_second": 1.037,
322
- "step": 270
323
  },
324
  {
325
  "epoch": 27.0,
326
- "step": 270,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  "total_flos": 0.0,
328
- "train_loss": 0.13240765200720894,
329
- "train_runtime": 1534.1197,
330
- "train_samples_per_second": 5.925,
331
- "train_steps_per_second": 0.176
332
  }
333
  ],
334
  "logging_steps": 40,
335
- "max_steps": 270,
336
  "num_input_tokens_seen": 0,
337
  "num_train_epochs": 30,
338
  "save_steps": 500,
 
1
  {
2
+ "best_global_step": 375,
3
+ "best_metric": 0.12483953684568405,
4
+ "best_model_checkpoint": "./vit_focus/checkpoint-375",
5
+ "epoch": 28.824742268041238,
6
  "eval_steps": 500,
7
+ "global_step": 720,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 1.0,
14
+ "eval_loss": 0.06848403811454773,
15
+ "eval_mae": 0.3276480436325073,
16
+ "eval_mse": 0.13970328867435455,
17
+ "eval_runtime": 25.3431,
18
+ "eval_samples_per_second": 7.655,
19
+ "eval_steps_per_second": 0.986,
20
+ "step": 25
21
+ },
22
+ {
23
+ "epoch": 1.6185567010309279,
24
+ "grad_norm": 6.8434062004089355,
25
+ "learning_rate": 4.7291666666666666e-05,
26
+ "loss": 0.2799,
27
+ "step": 40
28
  },
29
  {
30
  "epoch": 2.0,
31
+ "eval_loss": 0.06143104285001755,
32
+ "eval_mae": 0.31839117407798767,
33
+ "eval_mse": 0.13270144164562225,
34
+ "eval_runtime": 25.5699,
35
+ "eval_samples_per_second": 7.587,
36
+ "eval_steps_per_second": 0.978,
37
+ "step": 50
38
  },
39
  {
40
  "epoch": 3.0,
41
+ "eval_loss": 0.05753186717629433,
42
+ "eval_mae": 0.31711024045944214,
43
+ "eval_mse": 0.13165348768234253,
44
+ "eval_runtime": 26.2931,
45
+ "eval_samples_per_second": 7.378,
46
+ "eval_steps_per_second": 0.951,
47
+ "step": 75
48
+ },
49
+ {
50
+ "epoch": 3.2061855670103094,
51
+ "grad_norm": 6.380847454071045,
52
+ "learning_rate": 4.4513888888888885e-05,
53
+ "loss": 0.2134,
54
+ "step": 80
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "eval_loss": 0.06834284961223602,
59
+ "eval_mae": 0.3235570788383484,
60
+ "eval_mse": 0.13699457049369812,
61
+ "eval_runtime": 26.4774,
62
+ "eval_samples_per_second": 7.327,
63
+ "eval_steps_per_second": 0.944,
64
+ "step": 100
65
  },
66
  {
67
+ "epoch": 4.824742268041237,
68
+ "grad_norm": 11.873320579528809,
69
+ "learning_rate": 4.173611111111112e-05,
70
+ "loss": 0.2018,
71
+ "step": 120
 
 
 
72
  },
73
  {
74
  "epoch": 5.0,
75
+ "eval_loss": 0.0609874464571476,
76
+ "eval_mae": 0.3213047683238983,
77
+ "eval_mse": 0.13525618612766266,
78
+ "eval_runtime": 27.9495,
79
+ "eval_samples_per_second": 6.941,
80
+ "eval_steps_per_second": 0.894,
81
+ "step": 125
82
  },
83
  {
84
  "epoch": 6.0,
85
+ "eval_loss": 0.05957989767193794,
86
+ "eval_mae": 0.31332722306251526,
87
+ "eval_mse": 0.12951943278312683,
88
+ "eval_runtime": 33.4247,
89
+ "eval_samples_per_second": 5.804,
90
+ "eval_steps_per_second": 0.748,
91
+ "step": 150
92
+ },
93
+ {
94
+ "epoch": 6.412371134020619,
95
+ "grad_norm": 8.795011520385742,
96
+ "learning_rate": 3.8958333333333336e-05,
97
+ "loss": 0.1714,
98
+ "step": 160
99
  },
100
  {
101
  "epoch": 7.0,
102
+ "eval_loss": 0.058783117681741714,
103
+ "eval_mae": 0.3186197876930237,
104
+ "eval_mse": 0.13265401124954224,
105
+ "eval_runtime": 25.816,
106
+ "eval_samples_per_second": 7.515,
107
+ "eval_steps_per_second": 0.968,
108
+ "step": 175
109
  },
110
  {
111
  "epoch": 8.0,
112
+ "grad_norm": 1.3795604705810547,
113
+ "learning_rate": 3.6180555555555555e-05,
114
+ "loss": 0.1589,
115
+ "step": 200
116
  },
117
  {
118
  "epoch": 8.0,
119
+ "eval_loss": 0.062091995030641556,
120
+ "eval_mae": 0.32038411498069763,
121
+ "eval_mse": 0.13478334248065948,
122
+ "eval_runtime": 27.3432,
123
+ "eval_samples_per_second": 7.095,
124
+ "eval_steps_per_second": 0.914,
125
+ "step": 200
126
  },
127
  {
128
  "epoch": 9.0,
129
+ "eval_loss": 0.06154802814126015,
130
+ "eval_mae": 0.315742552280426,
131
+ "eval_mse": 0.13055546581745148,
132
+ "eval_runtime": 30.2503,
133
+ "eval_samples_per_second": 6.413,
134
+ "eval_steps_per_second": 0.826,
135
+ "step": 225
136
+ },
137
+ {
138
+ "epoch": 9.618556701030927,
139
+ "grad_norm": 13.09292984008789,
140
+ "learning_rate": 3.340277777777778e-05,
141
+ "loss": 0.1381,
142
+ "step": 240
143
  },
144
  {
145
  "epoch": 10.0,
146
+ "eval_loss": 0.055746473371982574,
147
+ "eval_mae": 0.31175413727760315,
148
+ "eval_mse": 0.12797364592552185,
149
+ "eval_runtime": 28.7143,
150
+ "eval_samples_per_second": 6.756,
151
+ "eval_steps_per_second": 0.871,
152
+ "step": 250
153
  },
154
  {
155
  "epoch": 11.0,
156
+ "eval_loss": 0.058016713708639145,
157
+ "eval_mae": 0.31583845615386963,
158
+ "eval_mse": 0.1310899704694748,
159
+ "eval_runtime": 25.6623,
160
+ "eval_samples_per_second": 7.56,
161
+ "eval_steps_per_second": 0.974,
162
+ "step": 275
163
  },
164
  {
165
+ "epoch": 11.206185567010309,
166
+ "grad_norm": 6.338009834289551,
167
+ "learning_rate": 3.0625000000000006e-05,
168
+ "loss": 0.1229,
169
+ "step": 280
170
  },
171
  {
172
  "epoch": 12.0,
173
+ "eval_loss": 0.05634809657931328,
174
+ "eval_mae": 0.31390878558158875,
175
+ "eval_mse": 0.12944912910461426,
176
+ "eval_runtime": 27.5624,
177
+ "eval_samples_per_second": 7.039,
178
+ "eval_steps_per_second": 0.907,
179
+ "step": 300
180
+ },
181
+ {
182
+ "epoch": 12.824742268041238,
183
+ "grad_norm": 5.399359703063965,
184
+ "learning_rate": 2.7847222222222224e-05,
185
+ "loss": 0.1112,
186
+ "step": 320
187
  },
188
  {
189
  "epoch": 13.0,
190
+ "eval_loss": 0.06292004883289337,
191
+ "eval_mae": 0.3253486454486847,
192
+ "eval_mse": 0.1393056958913803,
193
+ "eval_runtime": 26.5714,
194
+ "eval_samples_per_second": 7.301,
195
+ "eval_steps_per_second": 0.941,
196
+ "step": 325
197
  },
198
  {
199
  "epoch": 14.0,
200
+ "eval_loss": 0.060515470802783966,
201
+ "eval_mae": 0.31283605098724365,
202
+ "eval_mse": 0.12898671627044678,
203
+ "eval_runtime": 29.176,
204
+ "eval_samples_per_second": 6.649,
205
+ "eval_steps_per_second": 0.857,
206
+ "step": 350
207
+ },
208
+ {
209
+ "epoch": 14.412371134020619,
210
+ "grad_norm": 8.647767066955566,
211
+ "learning_rate": 2.5069444444444447e-05,
212
+ "loss": 0.0999,
213
+ "step": 360
214
  },
215
  {
216
  "epoch": 15.0,
217
+ "eval_loss": 0.06035061553120613,
218
+ "eval_mae": 0.3083080053329468,
219
+ "eval_mse": 0.12483953684568405,
220
+ "eval_runtime": 30.5066,
221
+ "eval_samples_per_second": 6.359,
222
+ "eval_steps_per_second": 0.819,
223
+ "step": 375
224
  },
225
  {
226
  "epoch": 16.0,
227
+ "grad_norm": 2.2351105213165283,
228
+ "learning_rate": 2.229166666666667e-05,
229
+ "loss": 0.0896,
230
+ "step": 400
231
  },
232
  {
233
  "epoch": 16.0,
234
+ "eval_loss": 0.055619120597839355,
235
+ "eval_mae": 0.3152642548084259,
236
+ "eval_mse": 0.1307651251554489,
237
+ "eval_runtime": 26.7772,
238
+ "eval_samples_per_second": 7.245,
239
+ "eval_steps_per_second": 0.934,
240
+ "step": 400
241
  },
242
  {
243
  "epoch": 17.0,
244
+ "eval_loss": 0.060975782573223114,
245
+ "eval_mae": 0.3201379179954529,
246
+ "eval_mse": 0.13466721773147583,
247
+ "eval_runtime": 26.1328,
248
+ "eval_samples_per_second": 7.424,
249
+ "eval_steps_per_second": 0.957,
250
+ "step": 425
251
+ },
252
+ {
253
+ "epoch": 17.61855670103093,
254
+ "grad_norm": 4.430182456970215,
255
+ "learning_rate": 1.951388888888889e-05,
256
+ "loss": 0.0776,
257
+ "step": 440
258
  },
259
  {
260
  "epoch": 18.0,
261
+ "eval_loss": 0.05742386728525162,
262
+ "eval_mae": 0.30930283665657043,
263
+ "eval_mse": 0.125941202044487,
264
+ "eval_runtime": 26.984,
265
+ "eval_samples_per_second": 7.189,
266
+ "eval_steps_per_second": 0.926,
267
+ "step": 450
268
  },
269
  {
270
  "epoch": 19.0,
271
+ "eval_loss": 0.05837095528841019,
272
+ "eval_mae": 0.308516263961792,
273
+ "eval_mse": 0.12533944845199585,
274
+ "eval_runtime": 26.6008,
275
+ "eval_samples_per_second": 7.293,
276
+ "eval_steps_per_second": 0.94,
277
+ "step": 475
278
  },
279
  {
280
+ "epoch": 19.20618556701031,
281
+ "grad_norm": 2.85257887840271,
282
+ "learning_rate": 1.673611111111111e-05,
283
+ "loss": 0.069,
284
+ "step": 480
285
  },
286
  {
287
  "epoch": 20.0,
288
+ "eval_loss": 0.05945156514644623,
289
+ "eval_mae": 0.30969831347465515,
290
+ "eval_mse": 0.12652333080768585,
291
+ "eval_runtime": 26.4776,
292
+ "eval_samples_per_second": 7.327,
293
+ "eval_steps_per_second": 0.944,
294
+ "step": 500
295
+ },
296
+ {
297
+ "epoch": 20.824742268041238,
298
+ "grad_norm": 5.132810115814209,
299
+ "learning_rate": 1.3958333333333335e-05,
300
+ "loss": 0.0649,
301
+ "step": 520
302
  },
303
  {
304
  "epoch": 21.0,
305
+ "eval_loss": 0.05759776383638382,
306
+ "eval_mae": 0.31495973467826843,
307
+ "eval_mse": 0.13078562915325165,
308
+ "eval_runtime": 26.9404,
309
+ "eval_samples_per_second": 7.201,
310
+ "eval_steps_per_second": 0.928,
311
+ "step": 525
312
  },
313
  {
314
  "epoch": 22.0,
315
+ "eval_loss": 0.05743802338838577,
316
+ "eval_mae": 0.3109038770198822,
317
+ "eval_mse": 0.12736806273460388,
318
+ "eval_runtime": 26.4163,
319
+ "eval_samples_per_second": 7.344,
320
+ "eval_steps_per_second": 0.946,
321
+ "step": 550
322
+ },
323
+ {
324
+ "epoch": 22.412371134020617,
325
+ "grad_norm": 2.2481906414031982,
326
+ "learning_rate": 1.1180555555555557e-05,
327
+ "loss": 0.056,
328
+ "step": 560
329
  },
330
  {
331
  "epoch": 23.0,
332
+ "eval_loss": 0.05784228816628456,
333
+ "eval_mae": 0.3148549795150757,
334
+ "eval_mse": 0.1306976079940796,
335
+ "eval_runtime": 26.7439,
336
+ "eval_samples_per_second": 7.254,
337
+ "eval_steps_per_second": 0.935,
338
+ "step": 575
339
  },
340
  {
341
  "epoch": 24.0,
342
+ "grad_norm": 0.4750465154647827,
343
+ "learning_rate": 8.402777777777779e-06,
344
+ "loss": 0.0508,
345
+ "step": 600
346
  },
347
  {
348
  "epoch": 24.0,
349
+ "eval_loss": 0.056339628994464874,
350
+ "eval_mae": 0.31389498710632324,
351
+ "eval_mse": 0.129553884267807,
352
+ "eval_runtime": 26.4072,
353
+ "eval_samples_per_second": 7.346,
354
+ "eval_steps_per_second": 0.947,
355
+ "step": 600
356
  },
357
  {
358
  "epoch": 25.0,
359
+ "eval_loss": 0.05677202716469765,
360
+ "eval_mae": 0.3157429099082947,
361
+ "eval_mse": 0.13120532035827637,
362
+ "eval_runtime": 26.4705,
363
+ "eval_samples_per_second": 7.329,
364
+ "eval_steps_per_second": 0.944,
365
+ "step": 625
366
  },
367
  {
368
+ "epoch": 25.61855670103093,
369
+ "grad_norm": 1.15388023853302,
370
+ "learning_rate": 5.625e-06,
371
+ "loss": 0.0468,
372
+ "step": 640
 
 
 
373
  },
374
  {
375
+ "epoch": 26.0,
376
+ "eval_loss": 0.05777855962514877,
377
+ "eval_mae": 0.3123283386230469,
378
+ "eval_mse": 0.12870892882347107,
379
+ "eval_runtime": 27.5864,
380
+ "eval_samples_per_second": 7.032,
381
+ "eval_steps_per_second": 0.906,
382
+ "step": 650
383
  },
384
  {
385
  "epoch": 27.0,
386
+ "eval_loss": 0.057855378836393356,
387
+ "eval_mae": 0.3146502673625946,
388
+ "eval_mse": 0.13045576214790344,
389
+ "eval_runtime": 25.4351,
390
+ "eval_samples_per_second": 7.627,
391
+ "eval_steps_per_second": 0.983,
392
+ "step": 675
393
+ },
394
+ {
395
+ "epoch": 27.20618556701031,
396
+ "grad_norm": 1.1987111568450928,
397
+ "learning_rate": 2.8472222222222224e-06,
398
+ "loss": 0.0432,
399
+ "step": 680
400
+ },
401
+ {
402
+ "epoch": 28.0,
403
+ "eval_loss": 0.057245105504989624,
404
+ "eval_mae": 0.3143112361431122,
405
+ "eval_mse": 0.13008151948451996,
406
+ "eval_runtime": 25.4633,
407
+ "eval_samples_per_second": 7.619,
408
+ "eval_steps_per_second": 0.982,
409
+ "step": 700
410
+ },
411
+ {
412
+ "epoch": 28.824742268041238,
413
+ "grad_norm": 1.100252628326416,
414
+ "learning_rate": 6.944444444444445e-08,
415
+ "loss": 0.0419,
416
+ "step": 720
417
+ },
418
+ {
419
+ "epoch": 28.824742268041238,
420
+ "eval_loss": 0.05797496438026428,
421
+ "eval_mae": 0.3150193989276886,
422
+ "eval_mse": 0.13078629970550537,
423
+ "eval_runtime": 27.1785,
424
+ "eval_samples_per_second": 7.138,
425
+ "eval_steps_per_second": 0.92,
426
+ "step": 720
427
+ },
428
+ {
429
+ "epoch": 28.824742268041238,
430
+ "step": 720,
431
  "total_flos": 0.0,
432
+ "train_loss": 0.11318666471375359,
433
+ "train_runtime": 4441.3944,
434
+ "train_samples_per_second": 5.221,
435
+ "train_steps_per_second": 0.162
436
  }
437
  ],
438
  "logging_steps": 40,
439
+ "max_steps": 720,
440
  "num_input_tokens_seen": 0,
441
  "num_train_epochs": 30,
442
  "save_steps": 500,