JulienRPA commited on
Commit
7726262
1 Parent(s): c1945c4

Training in progress, step 6000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f8804038bfc8c4fff09781205b46f28d914f21115e30a8813fb451cdb64ad17
3
  size 2000137067
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:546cd4e7ec947305a665edaa2e582e225d7b8301eaa78f72956ea9b0b6882be6
3
  size 2000137067
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ccb2c6910f34be7a138f85235698d8f0ec58283fab1416ef6a153c170b333ed
3
  size 1002469625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:938f724c0f91c737457d75986a81bb14510b0f67d394e6715914ee6427257f11
3
  size 1002469625
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:047a205ba65d4143fae2458ea498dbf8e5a685cd3e2e670c6951c8a299de8ae5
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db097afe89ed3c3eb23a6fe209105077506d8139157678f90acd89f98273927d
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5789a0b7645be9b850d95a6b32fdc2dd662d33e25aea9a3e9fcb4a7309e592af
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace18575c64fb9061a4bdb4187294f04e31e9a65a2e4da680ca78aeef9f963e2
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.795248078266946,
5
- "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -268,11 +268,142 @@
268
  "eval_samples_per_second": 2.402,
269
  "eval_steps_per_second": 0.301,
270
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
  }
272
  ],
273
  "max_steps": 11448,
274
  "num_train_epochs": 8,
275
- "total_flos": 2632028349156096.0,
276
  "trial_name": null,
277
  "trial_params": null
278
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.1928721174004195,
5
+ "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
268
  "eval_samples_per_second": 2.402,
269
  "eval_steps_per_second": 0.301,
270
  "step": 4000
271
+ },
272
+ {
273
+ "epoch": 2.87,
274
+ "learning_rate": 4.105945462673223e-05,
275
+ "loss": 2.9079,
276
+ "step": 4100
277
+ },
278
+ {
279
+ "epoch": 2.94,
280
+ "learning_rate": 4.0500670540903e-05,
281
+ "loss": 2.8688,
282
+ "step": 4200
283
+ },
284
+ {
285
+ "epoch": 3.0,
286
+ "learning_rate": 3.994188645507376e-05,
287
+ "loss": 2.7398,
288
+ "step": 4300
289
+ },
290
+ {
291
+ "epoch": 3.07,
292
+ "learning_rate": 3.938310236924452e-05,
293
+ "loss": 2.5503,
294
+ "step": 4400
295
+ },
296
+ {
297
+ "epoch": 3.14,
298
+ "learning_rate": 3.882431828341529e-05,
299
+ "loss": 2.5121,
300
+ "step": 4500
301
+ },
302
+ {
303
+ "epoch": 3.21,
304
+ "learning_rate": 3.8265534197586055e-05,
305
+ "loss": 2.4625,
306
+ "step": 4600
307
+ },
308
+ {
309
+ "epoch": 3.28,
310
+ "learning_rate": 3.7706750111756815e-05,
311
+ "loss": 2.3833,
312
+ "step": 4700
313
+ },
314
+ {
315
+ "epoch": 3.35,
316
+ "learning_rate": 3.714796602592758e-05,
317
+ "loss": 2.3133,
318
+ "step": 4800
319
+ },
320
+ {
321
+ "epoch": 3.42,
322
+ "learning_rate": 3.658918194009835e-05,
323
+ "loss": 2.2152,
324
+ "step": 4900
325
+ },
326
+ {
327
+ "epoch": 3.49,
328
+ "learning_rate": 3.603039785426911e-05,
329
+ "loss": 2.2304,
330
+ "step": 5000
331
+ },
332
+ {
333
+ "epoch": 3.56,
334
+ "learning_rate": 3.5471613768439874e-05,
335
+ "loss": 2.1563,
336
+ "step": 5100
337
+ },
338
+ {
339
+ "epoch": 3.63,
340
+ "learning_rate": 3.491282968261064e-05,
341
+ "loss": 2.0991,
342
+ "step": 5200
343
+ },
344
+ {
345
+ "epoch": 3.7,
346
+ "learning_rate": 3.4354045596781406e-05,
347
+ "loss": 2.0702,
348
+ "step": 5300
349
+ },
350
+ {
351
+ "epoch": 3.77,
352
+ "learning_rate": 3.379526151095217e-05,
353
+ "loss": 1.9914,
354
+ "step": 5400
355
+ },
356
+ {
357
+ "epoch": 3.84,
358
+ "learning_rate": 3.323647742512294e-05,
359
+ "loss": 1.9235,
360
+ "step": 5500
361
+ },
362
+ {
363
+ "epoch": 3.91,
364
+ "learning_rate": 3.26776933392937e-05,
365
+ "loss": 1.8922,
366
+ "step": 5600
367
+ },
368
+ {
369
+ "epoch": 3.98,
370
+ "learning_rate": 3.2118909253464465e-05,
371
+ "loss": 1.8512,
372
+ "step": 5700
373
+ },
374
+ {
375
+ "epoch": 4.05,
376
+ "learning_rate": 3.156012516763523e-05,
377
+ "loss": 1.6966,
378
+ "step": 5800
379
+ },
380
+ {
381
+ "epoch": 4.12,
382
+ "learning_rate": 3.100134108180599e-05,
383
+ "loss": 1.6399,
384
+ "step": 5900
385
+ },
386
+ {
387
+ "epoch": 4.19,
388
+ "learning_rate": 3.0442556995976757e-05,
389
+ "loss": 1.5982,
390
+ "step": 6000
391
+ },
392
+ {
393
+ "epoch": 4.19,
394
+ "eval_bleu": 56.0747,
395
+ "eval_em": 0.0017,
396
+ "eval_gen_len": 43.9021,
397
+ "eval_loss": 1.7495189905166626,
398
+ "eval_runtime": 423.2589,
399
+ "eval_samples_per_second": 2.847,
400
+ "eval_steps_per_second": 0.357,
401
+ "step": 6000
402
  }
403
  ],
404
  "max_steps": 11448,
405
  "num_train_epochs": 8,
406
+ "total_flos": 3958404255138432.0,
407
  "trial_name": null,
408
  "trial_params": null
409
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ccb2c6910f34be7a138f85235698d8f0ec58283fab1416ef6a153c170b333ed
3
  size 1002469625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:938f724c0f91c737457d75986a81bb14510b0f67d394e6715914ee6427257f11
3
  size 1002469625
runs/Jun05_10-45-59_0a95bf9de5ac/events.out.tfevents.1685962630.0a95bf9de5ac.3272.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbc2bf10f2e8697285e6c7d205571262251279a84d779b5e1565cd93e4f856da
3
- size 15654
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a959ff46eca4f3545c8c05e80af7a2fe5e478b7b3102f1f41ccc3cfcf54b040
3
+ size 19210