amyeroberts HF staff commited on
Commit
9af1fd9
1 Parent(s): 0afc0f8

End of training

Browse files
all_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_loss": 6395.94580078125,
4
- "eval_runtime": 0.6002,
5
- "eval_samples_per_second": 13.329,
6
- "eval_steps_per_second": 1.666,
7
- "train_loss": 6254.356591796875,
8
- "train_runtime": 330.5487,
9
- "train_samples_per_second": 1.936,
10
- "train_steps_per_second": 0.242
11
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "eval_loss": 6388.02587890625,
4
+ "eval_runtime": 0.6211,
5
+ "eval_samples_per_second": 12.88,
6
+ "eval_steps_per_second": 1.61,
7
+ "train_loss": 6294.72984375,
8
+ "train_runtime": 782.6675,
9
+ "train_samples_per_second": 2.044,
10
+ "train_steps_per_second": 0.256
11
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 20.0,
3
- "eval_loss": 6395.94580078125,
4
- "eval_runtime": 0.6002,
5
- "eval_samples_per_second": 13.329,
6
- "eval_steps_per_second": 1.666
7
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "eval_loss": 6388.02587890625,
4
+ "eval_runtime": 0.6211,
5
+ "eval_samples_per_second": 12.88,
6
+ "eval_steps_per_second": 1.61
7
  }
runs/Jul19_14-13-11_amy-2-gpu/events.out.tfevents.1689776807.amy-2-gpu.101345.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a0692b94998ca953e1bcb62053c26f59c67dfa4b008cd0d9b12099ffaa4eb09
3
+ size 359
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 20.0,
3
- "train_loss": 6254.356591796875,
4
- "train_runtime": 330.5487,
5
- "train_samples_per_second": 1.936,
6
- "train_steps_per_second": 0.242
7
  }
 
1
  {
2
+ "epoch": 50.0,
3
+ "train_loss": 6294.72984375,
4
+ "train_runtime": 782.6675,
5
+ "train_samples_per_second": 2.044,
6
+ "train_steps_per_second": 0.256
7
  }
trainer_state.json CHANGED
@@ -1,233 +1,545 @@
1
  {
2
- "best_metric": 6395.94580078125,
3
- "best_model_checkpoint": "./coco_outputs/checkpoint-72",
4
- "epoch": 20.0,
5
- "global_step": 80,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "eval_loss": 6398.77734375,
13
- "eval_runtime": 0.5241,
14
- "eval_samples_per_second": 15.264,
15
- "eval_steps_per_second": 1.908,
16
  "step": 4
17
  },
18
  {
19
  "epoch": 2.0,
20
- "eval_loss": 6398.435546875,
21
- "eval_runtime": 0.5241,
22
- "eval_samples_per_second": 15.265,
23
- "eval_steps_per_second": 1.908,
24
  "step": 8
25
  },
26
  {
27
  "epoch": 2.5,
28
- "learning_rate": 8.750000000000001e-06,
29
- "loss": 6176.0926,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 3.0,
34
- "eval_loss": 6398.2265625,
35
- "eval_runtime": 0.5251,
36
- "eval_samples_per_second": 15.235,
37
- "eval_steps_per_second": 1.904,
38
  "step": 12
39
  },
40
  {
41
  "epoch": 4.0,
42
- "eval_loss": 6397.8564453125,
43
- "eval_runtime": 0.5329,
44
- "eval_samples_per_second": 15.013,
45
- "eval_steps_per_second": 1.877,
46
  "step": 16
47
  },
48
  {
49
  "epoch": 5.0,
50
- "learning_rate": 7.500000000000001e-06,
51
- "loss": 6292.8656,
52
  "step": 20
53
  },
54
  {
55
  "epoch": 5.0,
56
- "eval_loss": 6397.6298828125,
57
- "eval_runtime": 0.5291,
58
- "eval_samples_per_second": 15.121,
59
- "eval_steps_per_second": 1.89,
60
  "step": 20
61
  },
62
  {
63
  "epoch": 6.0,
64
- "eval_loss": 6397.31787109375,
65
- "eval_runtime": 0.5319,
66
- "eval_samples_per_second": 15.042,
67
- "eval_steps_per_second": 1.88,
68
  "step": 24
69
  },
70
  {
71
  "epoch": 7.0,
72
- "eval_loss": 6396.95263671875,
73
- "eval_runtime": 0.5129,
74
- "eval_samples_per_second": 15.596,
75
- "eval_steps_per_second": 1.95,
76
  "step": 28
77
  },
78
  {
79
  "epoch": 7.5,
80
- "learning_rate": 6.25e-06,
81
- "loss": 6231.0688,
82
  "step": 30
83
  },
84
  {
85
  "epoch": 8.0,
86
- "eval_loss": 6396.79052734375,
87
- "eval_runtime": 0.5163,
88
- "eval_samples_per_second": 15.494,
89
- "eval_steps_per_second": 1.937,
90
  "step": 32
91
  },
92
  {
93
  "epoch": 9.0,
94
- "eval_loss": 6396.6748046875,
95
- "eval_runtime": 0.5303,
96
- "eval_samples_per_second": 15.085,
97
- "eval_steps_per_second": 1.886,
98
  "step": 36
99
  },
100
  {
101
  "epoch": 10.0,
102
- "learning_rate": 5e-06,
103
- "loss": 6090.4715,
104
  "step": 40
105
  },
106
  {
107
  "epoch": 10.0,
108
- "eval_loss": 6396.595703125,
109
- "eval_runtime": 0.5278,
110
- "eval_samples_per_second": 15.158,
111
- "eval_steps_per_second": 1.895,
112
  "step": 40
113
  },
114
  {
115
  "epoch": 11.0,
116
- "eval_loss": 6396.53466796875,
117
- "eval_runtime": 0.5332,
118
- "eval_samples_per_second": 15.004,
119
- "eval_steps_per_second": 1.876,
120
  "step": 44
121
  },
122
  {
123
  "epoch": 12.0,
124
- "eval_loss": 6396.41064453125,
125
- "eval_runtime": 0.5371,
126
- "eval_samples_per_second": 14.895,
127
- "eval_steps_per_second": 1.862,
128
  "step": 48
129
  },
130
  {
131
  "epoch": 12.5,
132
- "learning_rate": 3.7500000000000005e-06,
133
- "loss": 6268.359,
134
  "step": 50
135
  },
136
  {
137
  "epoch": 13.0,
138
- "eval_loss": 6396.3232421875,
139
- "eval_runtime": 0.521,
140
- "eval_samples_per_second": 15.354,
141
- "eval_steps_per_second": 1.919,
142
  "step": 52
143
  },
144
  {
145
  "epoch": 14.0,
146
- "eval_loss": 6396.234375,
147
- "eval_runtime": 0.5384,
148
- "eval_samples_per_second": 14.858,
149
- "eval_steps_per_second": 1.857,
150
  "step": 56
151
  },
152
  {
153
  "epoch": 15.0,
154
- "learning_rate": 2.5e-06,
155
- "loss": 6384.7945,
156
  "step": 60
157
  },
158
  {
159
  "epoch": 15.0,
160
- "eval_loss": 6396.095703125,
161
- "eval_runtime": 0.5186,
162
- "eval_samples_per_second": 15.426,
163
- "eval_steps_per_second": 1.928,
164
  "step": 60
165
  },
166
  {
167
  "epoch": 16.0,
168
- "eval_loss": 6396.09033203125,
169
- "eval_runtime": 0.5163,
170
- "eval_samples_per_second": 15.496,
171
- "eval_steps_per_second": 1.937,
172
  "step": 64
173
  },
174
  {
175
  "epoch": 17.0,
176
- "eval_loss": 6396.0029296875,
177
- "eval_runtime": 0.5253,
178
- "eval_samples_per_second": 15.228,
179
- "eval_steps_per_second": 1.904,
180
  "step": 68
181
  },
182
  {
183
  "epoch": 17.5,
184
- "learning_rate": 1.25e-06,
185
- "loss": 6325.7992,
186
  "step": 70
187
  },
188
  {
189
  "epoch": 18.0,
190
- "eval_loss": 6395.94580078125,
191
- "eval_runtime": 0.5319,
192
- "eval_samples_per_second": 15.042,
193
- "eval_steps_per_second": 1.88,
194
  "step": 72
195
  },
196
  {
197
  "epoch": 19.0,
198
- "eval_loss": 6395.97802734375,
199
- "eval_runtime": 0.5298,
200
- "eval_samples_per_second": 15.099,
201
- "eval_steps_per_second": 1.887,
202
  "step": 76
203
  },
204
  {
205
  "epoch": 20.0,
206
- "learning_rate": 0.0,
207
- "loss": 6265.4016,
208
  "step": 80
209
  },
210
  {
211
  "epoch": 20.0,
212
- "eval_loss": 6395.9765625,
213
- "eval_runtime": 0.5361,
214
- "eval_samples_per_second": 14.924,
215
- "eval_steps_per_second": 1.865,
216
  "step": 80
217
  },
218
  {
219
- "epoch": 20.0,
220
- "step": 80,
221
- "total_flos": 3.060008460288e+17,
222
- "train_loss": 6254.356591796875,
223
- "train_runtime": 330.5487,
224
- "train_samples_per_second": 1.936,
225
- "train_steps_per_second": 0.242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  }
227
  ],
228
- "max_steps": 80,
229
- "num_train_epochs": 20,
230
- "total_flos": 3.060008460288e+17,
231
  "trial_name": null,
232
  "trial_params": null
233
  }
 
1
  {
2
+ "best_metric": 6388.02587890625,
3
+ "best_model_checkpoint": "./coco_outputs/checkpoint-148",
4
+ "epoch": 50.0,
5
+ "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "eval_loss": 6397.626953125,
13
+ "eval_runtime": 0.5268,
14
+ "eval_samples_per_second": 15.187,
15
+ "eval_steps_per_second": 1.898,
16
  "step": 4
17
  },
18
  {
19
  "epoch": 2.0,
20
+ "eval_loss": 6396.95263671875,
21
+ "eval_runtime": 0.5209,
22
+ "eval_samples_per_second": 15.358,
23
+ "eval_steps_per_second": 1.92,
24
  "step": 8
25
  },
26
  {
27
  "epoch": 2.5,
28
+ "learning_rate": 4.75e-05,
29
+ "loss": 6175.4402,
30
  "step": 10
31
  },
32
  {
33
  "epoch": 3.0,
34
+ "eval_loss": 6396.91357421875,
35
+ "eval_runtime": 0.5052,
36
+ "eval_samples_per_second": 15.836,
37
+ "eval_steps_per_second": 1.98,
38
  "step": 12
39
  },
40
  {
41
  "epoch": 4.0,
42
+ "eval_loss": 6396.29931640625,
43
+ "eval_runtime": 0.527,
44
+ "eval_samples_per_second": 15.181,
45
+ "eval_steps_per_second": 1.898,
46
  "step": 16
47
  },
48
  {
49
  "epoch": 5.0,
50
+ "learning_rate": 4.5e-05,
51
+ "loss": 6291.4262,
52
  "step": 20
53
  },
54
  {
55
  "epoch": 5.0,
56
+ "eval_loss": 6395.5556640625,
57
+ "eval_runtime": 0.5357,
58
+ "eval_samples_per_second": 14.934,
59
+ "eval_steps_per_second": 1.867,
60
  "step": 20
61
  },
62
  {
63
  "epoch": 6.0,
64
+ "eval_loss": 6394.595703125,
65
+ "eval_runtime": 0.5351,
66
+ "eval_samples_per_second": 14.951,
67
+ "eval_steps_per_second": 1.869,
68
  "step": 24
69
  },
70
  {
71
  "epoch": 7.0,
72
+ "eval_loss": 6393.72900390625,
73
+ "eval_runtime": 0.5361,
74
+ "eval_samples_per_second": 14.922,
75
+ "eval_steps_per_second": 1.865,
76
  "step": 28
77
  },
78
  {
79
  "epoch": 7.5,
80
+ "learning_rate": 4.25e-05,
81
+ "loss": 6228.9867,
82
  "step": 30
83
  },
84
  {
85
  "epoch": 8.0,
86
+ "eval_loss": 6393.19140625,
87
+ "eval_runtime": 0.5377,
88
+ "eval_samples_per_second": 14.879,
89
+ "eval_steps_per_second": 1.86,
90
  "step": 32
91
  },
92
  {
93
  "epoch": 9.0,
94
+ "eval_loss": 6392.28369140625,
95
+ "eval_runtime": 0.6156,
96
+ "eval_samples_per_second": 12.995,
97
+ "eval_steps_per_second": 1.624,
98
  "step": 36
99
  },
100
  {
101
  "epoch": 10.0,
102
+ "learning_rate": 4e-05,
103
+ "loss": 6087.3184,
104
  "step": 40
105
  },
106
  {
107
  "epoch": 10.0,
108
+ "eval_loss": 6391.6708984375,
109
+ "eval_runtime": 0.5319,
110
+ "eval_samples_per_second": 15.04,
111
+ "eval_steps_per_second": 1.88,
112
  "step": 40
113
  },
114
  {
115
  "epoch": 11.0,
116
+ "eval_loss": 6390.72705078125,
117
+ "eval_runtime": 0.5435,
118
+ "eval_samples_per_second": 14.719,
119
+ "eval_steps_per_second": 1.84,
120
  "step": 44
121
  },
122
  {
123
  "epoch": 12.0,
124
+ "eval_loss": 6389.6669921875,
125
+ "eval_runtime": 0.5411,
126
+ "eval_samples_per_second": 14.785,
127
+ "eval_steps_per_second": 1.848,
128
  "step": 48
129
  },
130
  {
131
  "epoch": 12.5,
132
+ "learning_rate": 3.7500000000000003e-05,
133
+ "loss": 6263.3945,
134
  "step": 50
135
  },
136
  {
137
  "epoch": 13.0,
138
+ "eval_loss": 6389.49072265625,
139
+ "eval_runtime": 0.5174,
140
+ "eval_samples_per_second": 15.462,
141
+ "eval_steps_per_second": 1.933,
142
  "step": 52
143
  },
144
  {
145
  "epoch": 14.0,
146
+ "eval_loss": 6388.85400390625,
147
+ "eval_runtime": 0.5481,
148
+ "eval_samples_per_second": 14.597,
149
+ "eval_steps_per_second": 1.825,
150
  "step": 56
151
  },
152
  {
153
  "epoch": 15.0,
154
+ "learning_rate": 3.5e-05,
155
+ "loss": 6378.8203,
156
  "step": 60
157
  },
158
  {
159
  "epoch": 15.0,
160
+ "eval_loss": 6388.69775390625,
161
+ "eval_runtime": 0.5244,
162
+ "eval_samples_per_second": 15.255,
163
+ "eval_steps_per_second": 1.907,
164
  "step": 60
165
  },
166
  {
167
  "epoch": 16.0,
168
+ "eval_loss": 6388.6611328125,
169
+ "eval_runtime": 0.5266,
170
+ "eval_samples_per_second": 15.193,
171
+ "eval_steps_per_second": 1.899,
172
  "step": 64
173
  },
174
  {
175
  "epoch": 17.0,
176
+ "eval_loss": 6388.54296875,
177
+ "eval_runtime": 0.5529,
178
+ "eval_samples_per_second": 14.47,
179
+ "eval_steps_per_second": 1.809,
180
  "step": 68
181
  },
182
  {
183
  "epoch": 17.5,
184
+ "learning_rate": 3.2500000000000004e-05,
185
+ "loss": 6319.1008,
186
  "step": 70
187
  },
188
  {
189
  "epoch": 18.0,
190
+ "eval_loss": 6388.421875,
191
+ "eval_runtime": 0.5476,
192
+ "eval_samples_per_second": 14.609,
193
+ "eval_steps_per_second": 1.826,
194
  "step": 72
195
  },
196
  {
197
  "epoch": 19.0,
198
+ "eval_loss": 6388.505859375,
199
+ "eval_runtime": 0.5589,
200
+ "eval_samples_per_second": 14.314,
201
+ "eval_steps_per_second": 1.789,
202
  "step": 76
203
  },
204
  {
205
  "epoch": 20.0,
206
+ "learning_rate": 3e-05,
207
+ "loss": 6258.6707,
208
  "step": 80
209
  },
210
  {
211
  "epoch": 20.0,
212
+ "eval_loss": 6388.39501953125,
213
+ "eval_runtime": 0.5558,
214
+ "eval_samples_per_second": 14.393,
215
+ "eval_steps_per_second": 1.799,
216
  "step": 80
217
  },
218
  {
219
+ "epoch": 21.0,
220
+ "eval_loss": 6388.30419921875,
221
+ "eval_runtime": 0.6087,
222
+ "eval_samples_per_second": 13.142,
223
+ "eval_steps_per_second": 1.643,
224
+ "step": 84
225
+ },
226
+ {
227
+ "epoch": 22.0,
228
+ "eval_loss": 6388.27880859375,
229
+ "eval_runtime": 0.5565,
230
+ "eval_samples_per_second": 14.376,
231
+ "eval_steps_per_second": 1.797,
232
+ "step": 88
233
+ },
234
+ {
235
+ "epoch": 22.5,
236
+ "learning_rate": 2.7500000000000004e-05,
237
+ "loss": 6290.5227,
238
+ "step": 90
239
+ },
240
+ {
241
+ "epoch": 23.0,
242
+ "eval_loss": 6388.27001953125,
243
+ "eval_runtime": 0.5542,
244
+ "eval_samples_per_second": 14.435,
245
+ "eval_steps_per_second": 1.804,
246
+ "step": 92
247
+ },
248
+ {
249
+ "epoch": 24.0,
250
+ "eval_loss": 6388.3994140625,
251
+ "eval_runtime": 0.5489,
252
+ "eval_samples_per_second": 14.573,
253
+ "eval_steps_per_second": 1.822,
254
+ "step": 96
255
+ },
256
+ {
257
+ "epoch": 25.0,
258
+ "learning_rate": 2.5e-05,
259
+ "loss": 6450.2746,
260
+ "step": 100
261
+ },
262
+ {
263
+ "epoch": 25.0,
264
+ "eval_loss": 6388.41796875,
265
+ "eval_runtime": 0.5566,
266
+ "eval_samples_per_second": 14.372,
267
+ "eval_steps_per_second": 1.797,
268
+ "step": 100
269
+ },
270
+ {
271
+ "epoch": 26.0,
272
+ "eval_loss": 6388.23291015625,
273
+ "eval_runtime": 0.5471,
274
+ "eval_samples_per_second": 14.622,
275
+ "eval_steps_per_second": 1.828,
276
+ "step": 104
277
+ },
278
+ {
279
+ "epoch": 27.0,
280
+ "eval_loss": 6388.30615234375,
281
+ "eval_runtime": 0.5404,
282
+ "eval_samples_per_second": 14.803,
283
+ "eval_steps_per_second": 1.85,
284
+ "step": 108
285
+ },
286
+ {
287
+ "epoch": 27.5,
288
+ "learning_rate": 2.25e-05,
289
+ "loss": 6321.6988,
290
+ "step": 110
291
+ },
292
+ {
293
+ "epoch": 28.0,
294
+ "eval_loss": 6388.2568359375,
295
+ "eval_runtime": 0.5645,
296
+ "eval_samples_per_second": 14.172,
297
+ "eval_steps_per_second": 1.772,
298
+ "step": 112
299
+ },
300
+ {
301
+ "epoch": 29.0,
302
+ "eval_loss": 6388.13330078125,
303
+ "eval_runtime": 0.5582,
304
+ "eval_samples_per_second": 14.333,
305
+ "eval_steps_per_second": 1.792,
306
+ "step": 116
307
+ },
308
+ {
309
+ "epoch": 30.0,
310
+ "learning_rate": 2e-05,
311
+ "loss": 6468.3922,
312
+ "step": 120
313
+ },
314
+ {
315
+ "epoch": 30.0,
316
+ "eval_loss": 6388.166015625,
317
+ "eval_runtime": 0.5512,
318
+ "eval_samples_per_second": 14.515,
319
+ "eval_steps_per_second": 1.814,
320
+ "step": 120
321
+ },
322
+ {
323
+ "epoch": 31.0,
324
+ "eval_loss": 6388.15283203125,
325
+ "eval_runtime": 0.5777,
326
+ "eval_samples_per_second": 13.848,
327
+ "eval_steps_per_second": 1.731,
328
+ "step": 124
329
+ },
330
+ {
331
+ "epoch": 32.0,
332
+ "eval_loss": 6388.12109375,
333
+ "eval_runtime": 0.5552,
334
+ "eval_samples_per_second": 14.408,
335
+ "eval_steps_per_second": 1.801,
336
+ "step": 128
337
+ },
338
+ {
339
+ "epoch": 32.5,
340
+ "learning_rate": 1.75e-05,
341
+ "loss": 6305.5602,
342
+ "step": 130
343
+ },
344
+ {
345
+ "epoch": 33.0,
346
+ "eval_loss": 6388.09716796875,
347
+ "eval_runtime": 0.5442,
348
+ "eval_samples_per_second": 14.7,
349
+ "eval_steps_per_second": 1.838,
350
+ "step": 132
351
+ },
352
+ {
353
+ "epoch": 34.0,
354
+ "eval_loss": 6388.07763671875,
355
+ "eval_runtime": 0.5513,
356
+ "eval_samples_per_second": 14.511,
357
+ "eval_steps_per_second": 1.814,
358
+ "step": 136
359
+ },
360
+ {
361
+ "epoch": 35.0,
362
+ "learning_rate": 1.5e-05,
363
+ "loss": 6349.5672,
364
+ "step": 140
365
+ },
366
+ {
367
+ "epoch": 35.0,
368
+ "eval_loss": 6388.04931640625,
369
+ "eval_runtime": 0.5426,
370
+ "eval_samples_per_second": 14.744,
371
+ "eval_steps_per_second": 1.843,
372
+ "step": 140
373
+ },
374
+ {
375
+ "epoch": 36.0,
376
+ "eval_loss": 6388.03173828125,
377
+ "eval_runtime": 0.5452,
378
+ "eval_samples_per_second": 14.674,
379
+ "eval_steps_per_second": 1.834,
380
+ "step": 144
381
+ },
382
+ {
383
+ "epoch": 37.0,
384
+ "eval_loss": 6388.02587890625,
385
+ "eval_runtime": 0.5716,
386
+ "eval_samples_per_second": 13.996,
387
+ "eval_steps_per_second": 1.75,
388
+ "step": 148
389
+ },
390
+ {
391
+ "epoch": 37.5,
392
+ "learning_rate": 1.25e-05,
393
+ "loss": 6244.9891,
394
+ "step": 150
395
+ },
396
+ {
397
+ "epoch": 38.0,
398
+ "eval_loss": 6388.037109375,
399
+ "eval_runtime": 0.5397,
400
+ "eval_samples_per_second": 14.822,
401
+ "eval_steps_per_second": 1.853,
402
+ "step": 152
403
+ },
404
+ {
405
+ "epoch": 39.0,
406
+ "eval_loss": 6388.02734375,
407
+ "eval_runtime": 0.5505,
408
+ "eval_samples_per_second": 14.532,
409
+ "eval_steps_per_second": 1.817,
410
+ "step": 156
411
+ },
412
+ {
413
+ "epoch": 40.0,
414
+ "learning_rate": 1e-05,
415
+ "loss": 6383.7191,
416
+ "step": 160
417
+ },
418
+ {
419
+ "epoch": 40.0,
420
+ "eval_loss": 6388.146484375,
421
+ "eval_runtime": 0.537,
422
+ "eval_samples_per_second": 14.898,
423
+ "eval_steps_per_second": 1.862,
424
+ "step": 160
425
+ },
426
+ {
427
+ "epoch": 41.0,
428
+ "eval_loss": 6388.25048828125,
429
+ "eval_runtime": 0.5406,
430
+ "eval_samples_per_second": 14.797,
431
+ "eval_steps_per_second": 1.85,
432
+ "step": 164
433
+ },
434
+ {
435
+ "epoch": 42.0,
436
+ "eval_loss": 6388.1259765625,
437
+ "eval_runtime": 0.5956,
438
+ "eval_samples_per_second": 13.433,
439
+ "eval_steps_per_second": 1.679,
440
+ "step": 168
441
+ },
442
+ {
443
+ "epoch": 42.5,
444
+ "learning_rate": 7.5e-06,
445
+ "loss": 6260.3203,
446
+ "step": 170
447
+ },
448
+ {
449
+ "epoch": 43.0,
450
+ "eval_loss": 6388.0517578125,
451
+ "eval_runtime": 0.557,
452
+ "eval_samples_per_second": 14.363,
453
+ "eval_steps_per_second": 1.795,
454
+ "step": 172
455
+ },
456
+ {
457
+ "epoch": 44.0,
458
+ "eval_loss": 6388.0322265625,
459
+ "eval_runtime": 0.5537,
460
+ "eval_samples_per_second": 14.447,
461
+ "eval_steps_per_second": 1.806,
462
+ "step": 176
463
+ },
464
+ {
465
+ "epoch": 45.0,
466
+ "learning_rate": 5e-06,
467
+ "loss": 6254.6055,
468
+ "step": 180
469
+ },
470
+ {
471
+ "epoch": 45.0,
472
+ "eval_loss": 6388.0625,
473
+ "eval_runtime": 0.5885,
474
+ "eval_samples_per_second": 13.595,
475
+ "eval_steps_per_second": 1.699,
476
+ "step": 180
477
+ },
478
+ {
479
+ "epoch": 46.0,
480
+ "eval_loss": 6388.06884765625,
481
+ "eval_runtime": 0.5716,
482
+ "eval_samples_per_second": 13.995,
483
+ "eval_steps_per_second": 1.749,
484
+ "step": 184
485
+ },
486
+ {
487
+ "epoch": 47.0,
488
+ "eval_loss": 6388.06005859375,
489
+ "eval_runtime": 0.5704,
490
+ "eval_samples_per_second": 14.026,
491
+ "eval_steps_per_second": 1.753,
492
+ "step": 188
493
+ },
494
+ {
495
+ "epoch": 47.5,
496
+ "learning_rate": 2.5e-06,
497
+ "loss": 6351.991,
498
+ "step": 190
499
+ },
500
+ {
501
+ "epoch": 48.0,
502
+ "eval_loss": 6388.08251953125,
503
+ "eval_runtime": 0.5514,
504
+ "eval_samples_per_second": 14.507,
505
+ "eval_steps_per_second": 1.813,
506
+ "step": 192
507
+ },
508
+ {
509
+ "epoch": 49.0,
510
+ "eval_loss": 6388.09375,
511
+ "eval_runtime": 0.5515,
512
+ "eval_samples_per_second": 14.506,
513
+ "eval_steps_per_second": 1.813,
514
+ "step": 196
515
+ },
516
+ {
517
+ "epoch": 50.0,
518
+ "learning_rate": 0.0,
519
+ "loss": 6209.7984,
520
+ "step": 200
521
+ },
522
+ {
523
+ "epoch": 50.0,
524
+ "eval_loss": 6388.09228515625,
525
+ "eval_runtime": 0.566,
526
+ "eval_samples_per_second": 14.135,
527
+ "eval_steps_per_second": 1.767,
528
+ "step": 200
529
+ },
530
+ {
531
+ "epoch": 50.0,
532
+ "step": 200,
533
+ "total_flos": 7.65002115072e+17,
534
+ "train_loss": 6294.72984375,
535
+ "train_runtime": 782.6675,
536
+ "train_samples_per_second": 2.044,
537
+ "train_steps_per_second": 0.256
538
  }
539
  ],
540
+ "max_steps": 200,
541
+ "num_train_epochs": 50,
542
+ "total_flos": 7.65002115072e+17,
543
  "trial_name": null,
544
  "trial_params": null
545
  }