stefania-radu commited on
Commit
c134cc3
1 Parent(s): 1625a53

Training in progress, step 70000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a05b029ed1927bee45376b9ec825ac1e6ad7ff6cc8cad870573ced24bc0e8a7a
3
- size 893440890
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:febd4ddd93771c4f15dcf974f5893b102541ab530165ff6539605cf0e37dfc55
3
+ size 893441530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c27e63159040ffd991f11430bcaa8ba43680821f717186a7bc3c221a468601c
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35925eb08957bf9a46de58b0202e620c1dd065067603389209eb33e48dd1166
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76f1055b9fbc745e8095815e2f9b0f2460e6416163d65d839c2d6bec7dc4f817
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8998e548c91f0f182806a61c3bafac5235a976f6ff7eb69017a5b8e22c368c89
3
+ size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c55f5050853d52e8377712864d5685c98b2bc91dad2debd9776569f4616235b0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613de0658f4e0e57fee5c3fe331af891a766b9d5b43031df1917116c932b0b1d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06,
5
- "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -450,11 +450,85 @@
450
  "eval_samples_per_second": 108.578,
451
  "eval_steps_per_second": 13.572,
452
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  }
454
  ],
455
  "max_steps": 1000000,
456
  "num_train_epochs": 9223372036854775807,
457
- "total_flos": 5.3127661039386624e+20,
458
  "trial_name": null,
459
  "trial_params": null
460
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.07,
5
+ "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
450
  "eval_samples_per_second": 108.578,
451
  "eval_steps_per_second": 13.572,
452
  "step": 60000
453
+ },
454
+ {
455
+ "epoch": 0.06,
456
+ "learning_rate": 1.8747105736439825e-05,
457
+ "loss": 0.4672,
458
+ "step": 61000
459
+ },
460
+ {
461
+ "epoch": 0.06,
462
+ "learning_rate": 1.8746555659365244e-05,
463
+ "loss": 0.4634,
464
+ "step": 62000
465
+ },
466
+ {
467
+ "epoch": 0.06,
468
+ "learning_rate": 1.8745957775658352e-05,
469
+ "loss": 0.4605,
470
+ "step": 63000
471
+ },
472
+ {
473
+ "epoch": 0.06,
474
+ "learning_rate": 1.8745312091857516e-05,
475
+ "loss": 0.4554,
476
+ "step": 64000
477
+ },
478
+ {
479
+ "epoch": 0.07,
480
+ "learning_rate": 1.8744618615023832e-05,
481
+ "loss": 0.4539,
482
+ "step": 65000
483
+ },
484
+ {
485
+ "epoch": 0.07,
486
+ "eval_runtime": 3195.2405,
487
+ "eval_samples_per_second": 106.299,
488
+ "eval_steps_per_second": 13.288,
489
+ "step": 65000
490
+ },
491
+ {
492
+ "epoch": 0.07,
493
+ "learning_rate": 1.874387735274105e-05,
494
+ "loss": 0.4547,
495
+ "step": 66000
496
+ },
497
+ {
498
+ "epoch": 0.07,
499
+ "learning_rate": 1.8743088313115487e-05,
500
+ "loss": 0.4536,
501
+ "step": 67000
502
+ },
503
+ {
504
+ "epoch": 0.07,
505
+ "learning_rate": 1.8742251504775967e-05,
506
+ "loss": 0.4548,
507
+ "step": 68000
508
+ },
509
+ {
510
+ "epoch": 0.07,
511
+ "learning_rate": 1.8741366936873687e-05,
512
+ "loss": 0.4528,
513
+ "step": 69000
514
+ },
515
+ {
516
+ "epoch": 0.07,
517
+ "learning_rate": 1.8740434619082138e-05,
518
+ "loss": 0.4525,
519
+ "step": 70000
520
+ },
521
+ {
522
+ "epoch": 0.07,
523
+ "eval_runtime": 2998.8361,
524
+ "eval_samples_per_second": 113.261,
525
+ "eval_steps_per_second": 14.158,
526
+ "step": 70000
527
  }
528
  ],
529
  "max_steps": 1000000,
530
  "num_train_epochs": 9223372036854775807,
531
+ "total_flos": 6.198227121261773e+20,
532
  "trial_name": null,
533
  "trial_params": null
534
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1d22e1a2519765d1bca40b43625e34e041226f1d79dfe4796ce059e0420a41c
3
- size 3704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3314aeda295ada0bfe55795dcf769542363573771d9b1416041c426b80bcfe0f
3
+ size 3768
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c27e63159040ffd991f11430bcaa8ba43680821f717186a7bc3c221a468601c
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35925eb08957bf9a46de58b0202e620c1dd065067603389209eb33e48dd1166
3
  size 454197066
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1d22e1a2519765d1bca40b43625e34e041226f1d79dfe4796ce059e0420a41c
3
- size 3704
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3314aeda295ada0bfe55795dcf769542363573771d9b1416041c426b80bcfe0f
3
+ size 3768