dq158 commited on
Commit
50a83b6
1 Parent(s): b93fe63

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd1821877d7d29077d85fe04dfea93e7292825d05231ebab51a1e126697c743
3
  size 18915040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70b4707eb6c214911672a8f3db9b0750144a4f27155736566a9396deed071d8a
3
  size 18915040
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f41766bc78b6ec0c8e40003000365c7287487226ebb9545f086a8ab4a75b48ae
3
  size 2603258
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c15b00248d28cdc84ab89f0c61c238b907e5551894b6c2da4c580a4d0f3bf06a
3
  size 2603258
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8f8922c352ab63d1593bcb944e77d461472df192dcdc1202984e9e08f16a111
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ba8427ac6eea57ec5734699585fe0dd282eb1503ee998fdc4232e54bb7d6354
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c80ef793f3bbade49954a24290acb44d3ed9d4cc93da4ad48634f3740d8739a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bf6b8361429219bc23dcf3f544eb0a05c3c81c223b22ff1dbb678c4e9f08edf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.393324613571167,
3
- "best_model_checkpoint": "dq158/pingusPongus/checkpoint-12646",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 12646,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -176,13 +176,182 @@
176
  "eval_steps_per_second": 0.889,
177
  "eval_translation_length": 1439232,
178
  "step": 12646
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  }
180
  ],
181
  "logging_steps": 500,
182
  "max_steps": 379380,
183
  "num_train_epochs": 30,
184
  "save_steps": 500,
185
- "total_flos": 2.1666322696686797e+17,
186
  "trial_name": null,
187
  "trial_params": null
188
  }
 
1
  {
2
+ "best_metric": 2.3343088626861572,
3
+ "best_model_checkpoint": "dq158/pingusPongus/checkpoint-25292",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 25292,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
176
  "eval_steps_per_second": 0.889,
177
  "eval_translation_length": 1439232,
178
  "step": 12646
179
+ },
180
+ {
181
+ "epoch": 1.03,
182
+ "learning_rate": 7.978533681098235e-05,
183
+ "loss": 2.4976,
184
+ "step": 13000
185
+ },
186
+ {
187
+ "epoch": 1.07,
188
+ "learning_rate": 7.976783725822707e-05,
189
+ "loss": 2.4559,
190
+ "step": 13500
191
+ },
192
+ {
193
+ "epoch": 1.11,
194
+ "learning_rate": 7.974965415947614e-05,
195
+ "loss": 2.5159,
196
+ "step": 14000
197
+ },
198
+ {
199
+ "epoch": 1.15,
200
+ "learning_rate": 7.97307878272682e-05,
201
+ "loss": 2.4853,
202
+ "step": 14500
203
+ },
204
+ {
205
+ "epoch": 1.19,
206
+ "learning_rate": 7.971123858588551e-05,
207
+ "loss": 2.5642,
208
+ "step": 15000
209
+ },
210
+ {
211
+ "epoch": 1.23,
212
+ "learning_rate": 7.969100677134854e-05,
213
+ "loss": 2.4762,
214
+ "step": 15500
215
+ },
216
+ {
217
+ "epoch": 1.27,
218
+ "learning_rate": 7.967009273141005e-05,
219
+ "loss": 2.4515,
220
+ "step": 16000
221
+ },
222
+ {
223
+ "epoch": 1.3,
224
+ "learning_rate": 7.964849682554919e-05,
225
+ "loss": 2.5239,
226
+ "step": 16500
227
+ },
228
+ {
229
+ "epoch": 1.34,
230
+ "learning_rate": 7.962621942496528e-05,
231
+ "loss": 2.4723,
232
+ "step": 17000
233
+ },
234
+ {
235
+ "epoch": 1.38,
236
+ "learning_rate": 7.96032609125715e-05,
237
+ "loss": 2.5052,
238
+ "step": 17500
239
+ },
240
+ {
241
+ "epoch": 1.42,
242
+ "learning_rate": 7.957962168298823e-05,
243
+ "loss": 2.4817,
244
+ "step": 18000
245
+ },
246
+ {
247
+ "epoch": 1.46,
248
+ "learning_rate": 7.95553021425363e-05,
249
+ "loss": 2.505,
250
+ "step": 18500
251
+ },
252
+ {
253
+ "epoch": 1.5,
254
+ "learning_rate": 7.953030270922999e-05,
255
+ "loss": 2.5106,
256
+ "step": 19000
257
+ },
258
+ {
259
+ "epoch": 1.54,
260
+ "learning_rate": 7.950462381276989e-05,
261
+ "loss": 2.4658,
262
+ "step": 19500
263
+ },
264
+ {
265
+ "epoch": 1.58,
266
+ "learning_rate": 7.947826589453548e-05,
267
+ "loss": 2.4544,
268
+ "step": 20000
269
+ },
270
+ {
271
+ "epoch": 1.62,
272
+ "learning_rate": 7.94512294075775e-05,
273
+ "loss": 2.462,
274
+ "step": 20500
275
+ },
276
+ {
277
+ "epoch": 1.66,
278
+ "learning_rate": 7.942351481661027e-05,
279
+ "loss": 2.4447,
280
+ "step": 21000
281
+ },
282
+ {
283
+ "epoch": 1.7,
284
+ "learning_rate": 7.93951225980036e-05,
285
+ "loss": 2.4918,
286
+ "step": 21500
287
+ },
288
+ {
289
+ "epoch": 1.74,
290
+ "learning_rate": 7.936605323977468e-05,
291
+ "loss": 2.4594,
292
+ "step": 22000
293
+ },
294
+ {
295
+ "epoch": 1.78,
296
+ "learning_rate": 7.933630724157962e-05,
297
+ "loss": 2.5028,
298
+ "step": 22500
299
+ },
300
+ {
301
+ "epoch": 1.82,
302
+ "learning_rate": 7.93058851147049e-05,
303
+ "loss": 2.4924,
304
+ "step": 23000
305
+ },
306
+ {
307
+ "epoch": 1.86,
308
+ "learning_rate": 7.92747873820586e-05,
309
+ "loss": 2.4138,
310
+ "step": 23500
311
+ },
312
+ {
313
+ "epoch": 1.9,
314
+ "learning_rate": 7.924301457816139e-05,
315
+ "loss": 2.4198,
316
+ "step": 24000
317
+ },
318
+ {
319
+ "epoch": 1.94,
320
+ "learning_rate": 7.921056724913732e-05,
321
+ "loss": 2.4524,
322
+ "step": 24500
323
+ },
324
+ {
325
+ "epoch": 1.98,
326
+ "learning_rate": 7.917744595270448e-05,
327
+ "loss": 2.4497,
328
+ "step": 25000
329
+ },
330
+ {
331
+ "epoch": 2.0,
332
+ "eval_bleu": 1.0,
333
+ "eval_brevity_penalty": 1.0,
334
+ "eval_length_ratio": 1.0,
335
+ "eval_loss": 2.3343088626861572,
336
+ "eval_precisions": [
337
+ 1.0,
338
+ 1.0,
339
+ 1.0,
340
+ 1.0
341
+ ],
342
+ "eval_reference_length": 1439232,
343
+ "eval_runtime": 1578.692,
344
+ "eval_samples_per_second": 1.781,
345
+ "eval_steps_per_second": 0.891,
346
+ "eval_translation_length": 1439232,
347
+ "step": 25292
348
  }
349
  ],
350
  "logging_steps": 500,
351
  "max_steps": 379380,
352
  "num_train_epochs": 30,
353
  "save_steps": 500,
354
+ "total_flos": 4.3332645393373594e+17,
355
  "trial_name": null,
356
  "trial_params": null
357
  }