stefania-radu commited on
Commit
1625a53
1 Parent(s): 63bd7a2

Training in progress, step 60000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b6e0063168d01e8d9ad637627df35c664793162761ded8ac5e14cee0a39fa4b
3
  size 893440890
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05b029ed1927bee45376b9ec825ac1e6ad7ff6cc8cad870573ced24bc0e8a7a
3
  size 893440890
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bcf9f6920ad5ded398eb8f93116f71e8adc7c3a91f6cd5c0da8e0f651bc8faf
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c27e63159040ffd991f11430bcaa8ba43680821f717186a7bc3c221a468601c
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:832b02611c12b9ad0dcde16a2bc02988c4f2b0b885960cfa4b9125a284bdeb99
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76f1055b9fbc745e8095815e2f9b0f2460e6416163d65d839c2d6bec7dc4f817
3
+ size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cfba27d7b6929b849933e9ae7d0e075a396ddca1f7f2a518e796268cde14302
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c55f5050853d52e8377712864d5685c98b2bc91dad2debd9776569f4616235b0
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.05,
5
- "global_step": 50000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -376,11 +376,85 @@
376
  "eval_samples_per_second": 101.039,
377
  "eval_steps_per_second": 12.63,
378
  "step": 50000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  }
380
  ],
381
  "max_steps": 1000000,
382
  "num_train_epochs": 9223372036854775807,
383
- "total_flos": 4.427305086615552e+20,
384
  "trial_name": null,
385
  "trial_params": null
386
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06,
5
+ "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
376
  "eval_samples_per_second": 101.039,
377
  "eval_steps_per_second": 12.63,
378
  "step": 50000
379
+ },
380
+ {
381
+ "epoch": 0.05,
382
+ "learning_rate": 1.874997607785047e-05,
383
+ "loss": 0.497,
384
+ "step": 51000
385
+ },
386
+ {
387
+ "epoch": 0.05,
388
+ "learning_rate": 1.874990431166348e-05,
389
+ "loss": 0.498,
390
+ "step": 52000
391
+ },
392
+ {
393
+ "epoch": 0.05,
394
+ "learning_rate": 1.8749784702223863e-05,
395
+ "loss": 0.4921,
396
+ "step": 53000
397
+ },
398
+ {
399
+ "epoch": 0.05,
400
+ "learning_rate": 1.8749617250839647e-05,
401
+ "loss": 0.4876,
402
+ "step": 54000
403
+ },
404
+ {
405
+ "epoch": 0.06,
406
+ "learning_rate": 1.8749401959342052e-05,
407
+ "loss": 0.483,
408
+ "step": 55000
409
+ },
410
+ {
411
+ "epoch": 0.06,
412
+ "eval_runtime": 3145.913,
413
+ "eval_samples_per_second": 107.966,
414
+ "eval_steps_per_second": 13.496,
415
+ "step": 55000
416
+ },
417
+ {
418
+ "epoch": 0.06,
419
+ "learning_rate": 1.874913883008547e-05,
420
+ "loss": 0.4809,
421
+ "step": 56000
422
+ },
423
+ {
424
+ "epoch": 0.06,
425
+ "learning_rate": 1.8748827865947437e-05,
426
+ "loss": 0.4788,
427
+ "step": 57000
428
+ },
429
+ {
430
+ "epoch": 0.06,
431
+ "learning_rate": 1.8748469070328614e-05,
432
+ "loss": 0.4767,
433
+ "step": 58000
434
+ },
435
+ {
436
+ "epoch": 0.06,
437
+ "learning_rate": 1.8748062447152732e-05,
438
+ "loss": 0.475,
439
+ "step": 59000
440
+ },
441
+ {
442
+ "epoch": 0.06,
443
+ "learning_rate": 1.874760800086655e-05,
444
+ "loss": 0.4696,
445
+ "step": 60000
446
+ },
447
+ {
448
+ "epoch": 0.06,
449
+ "eval_runtime": 3128.1932,
450
+ "eval_samples_per_second": 108.578,
451
+ "eval_steps_per_second": 13.572,
452
+ "step": 60000
453
  }
454
  ],
455
  "max_steps": 1000000,
456
  "num_train_epochs": 9223372036854775807,
457
+ "total_flos": 5.3127661039386624e+20,
458
  "trial_name": null,
459
  "trial_params": null
460
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3bcf9f6920ad5ded398eb8f93116f71e8adc7c3a91f6cd5c0da8e0f651bc8faf
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c27e63159040ffd991f11430bcaa8ba43680821f717186a7bc3c221a468601c
3
  size 454197066