stefania-radu commited on
Commit
2386eb8
1 Parent(s): b34cce8

Training in progress, step 370000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:032632aa2e59a7370c8a2f40437ef69da58b65449cfe7fa3a8278baf768cc91b
3
  size 893441530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5937fbf5a223bdee412711090b2afe19ccd59cb5aa00ff257849fe3abbc0df
3
  size 893441530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:927cfdcc9ed50ca027e558ae501d9eb171688bcdcc5554011a95dbad82ff27f7
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb18529144e5631f4110d550e5ae8dec1ab66ae10143ec11a390bb4bb1168003
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8dea57f79fcdb60961c4b12abe4b4ae1797e7d5be498f1ee068e015bcea184a5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57495da3e14701eaa78ba01626380cf06c08d77e93ad6e4f3e1fa2ad42d22c6c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96cf24b2595fb9ea938ee5062cd07f0addfb4ccce513bd6fa04dd8edc00aa72d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83bbce26cd1d82261c68bda952c3a8a3f941ff6e36a11d4fea5bc7113b3bffa2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.36,
5
- "global_step": 360000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2670,11 +2670,85 @@
2670
  "eval_samples_per_second": 102.337,
2671
  "eval_steps_per_second": 12.792,
2672
  "step": 360000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2673
  }
2674
  ],
2675
  "max_steps": 1000000,
2676
  "num_train_epochs": 9223372036854775807,
2677
- "total_flos": 3.1876596623631974e+21,
2678
  "trial_name": null,
2679
  "trial_params": null
2680
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.37,
5
+ "global_step": 370000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2670
  "eval_samples_per_second": 102.337,
2671
  "eval_steps_per_second": 12.792,
2672
  "step": 360000
2673
+ },
2674
+ {
2675
+ "epoch": 0.36,
2676
+ "learning_rate": 1.6633112470897675e-05,
2677
+ "loss": 0.3442,
2678
+ "step": 361000
2679
+ },
2680
+ {
2681
+ "epoch": 0.36,
2682
+ "learning_rate": 1.6620708362146338e-05,
2683
+ "loss": 0.3441,
2684
+ "step": 362000
2685
+ },
2686
+ {
2687
+ "epoch": 0.36,
2688
+ "learning_rate": 1.6608279694688143e-05,
2689
+ "loss": 0.3441,
2690
+ "step": 363000
2691
+ },
2692
+ {
2693
+ "epoch": 0.36,
2694
+ "learning_rate": 1.659582660444101e-05,
2695
+ "loss": 0.3419,
2696
+ "step": 364000
2697
+ },
2698
+ {
2699
+ "epoch": 0.36,
2700
+ "learning_rate": 1.658334922758994e-05,
2701
+ "loss": 0.3405,
2702
+ "step": 365000
2703
+ },
2704
+ {
2705
+ "epoch": 0.36,
2706
+ "eval_runtime": 3198.5996,
2707
+ "eval_samples_per_second": 106.188,
2708
+ "eval_steps_per_second": 13.274,
2709
+ "step": 365000
2710
+ },
2711
+ {
2712
+ "epoch": 0.37,
2713
+ "learning_rate": 1.6570847700585524e-05,
2714
+ "loss": 0.3406,
2715
+ "step": 366000
2716
+ },
2717
+ {
2718
+ "epoch": 0.37,
2719
+ "learning_rate": 1.6558322160142462e-05,
2720
+ "loss": 0.341,
2721
+ "step": 367000
2722
+ },
2723
+ {
2724
+ "epoch": 0.37,
2725
+ "learning_rate": 1.654577274323806e-05,
2726
+ "loss": 0.342,
2727
+ "step": 368000
2728
+ },
2729
+ {
2730
+ "epoch": 0.37,
2731
+ "learning_rate": 1.653319958711072e-05,
2732
+ "loss": 0.3479,
2733
+ "step": 369000
2734
+ },
2735
+ {
2736
+ "epoch": 0.37,
2737
+ "learning_rate": 1.6520602829258474e-05,
2738
+ "loss": 0.3487,
2739
+ "step": 370000
2740
+ },
2741
+ {
2742
+ "epoch": 0.37,
2743
+ "eval_runtime": 3164.3305,
2744
+ "eval_samples_per_second": 107.338,
2745
+ "eval_steps_per_second": 13.417,
2746
+ "step": 370000
2747
  }
2748
  ],
2749
  "max_steps": 1000000,
2750
  "num_train_epochs": 9223372036854775807,
2751
+ "total_flos": 3.2762057640955085e+21,
2752
  "trial_name": null,
2753
  "trial_params": null
2754
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:927cfdcc9ed50ca027e558ae501d9eb171688bcdcc5554011a95dbad82ff27f7
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb18529144e5631f4110d550e5ae8dec1ab66ae10143ec11a390bb4bb1168003
3
  size 454197066