stefania-radu commited on
Commit
aa9de5a
1 Parent(s): 695bbd3

Training in progress, step 390000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7d80b5fe4b73581493fbf7e8d5375ba0cc27fca0abae2f649940dc93608ed91
3
  size 893441530
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea287e9b4ef69b399ab26f5a72f16419f78692f80f4478f9dc9fde54c01b3139
3
  size 893441530
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaf5cca6f5ad9c15d0fba6e119e98e99e1b26c717ce470e245593e0001049d48
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0754eb41ff5a1456b603e8b2753adb511ac0a9ee839b9d73f658f3c72064f5ce
3
  size 454197066
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fda2635cf5eb97c959b3473ee8ed882086ce2dab72fcbeb241e25a40c966b7bf
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea76d909d3c482cb352315f678531422d38691040b817f22cbe42d6a4283267d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b42df27cb36c10cd745fac26ea88f136bc616f898f1fd1486819c4852d2c54f3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceb7667759b15c886ebe5f226629aa0745d8db84769a12c7b2d7c8d4d08bd35a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.38,
5
- "global_step": 380000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2818,11 +2818,85 @@
2818
  "eval_samples_per_second": 107.622,
2819
  "eval_steps_per_second": 13.453,
2820
  "step": 380000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2821
  }
2822
  ],
2823
  "max_steps": 1000000,
2824
  "num_train_epochs": 9223372036854775807,
2825
- "total_flos": 3.3647518658278195e+21,
2826
  "trial_name": null,
2827
  "trial_params": null
2828
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.39,
5
+ "global_step": 390000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2818
  "eval_samples_per_second": 107.622,
2819
  "eval_steps_per_second": 13.453,
2820
  "step": 380000
2821
+ },
2822
+ {
2823
+ "epoch": 0.38,
2824
+ "learning_rate": 1.6380520358812106e-05,
2825
+ "loss": 0.3466,
2826
+ "step": 381000
2827
+ },
2828
+ {
2829
+ "epoch": 0.38,
2830
+ "learning_rate": 1.636765119754312e-05,
2831
+ "loss": 0.345,
2832
+ "step": 382000
2833
+ },
2834
+ {
2835
+ "epoch": 0.38,
2836
+ "learning_rate": 1.635476024495989e-05,
2837
+ "loss": 0.3439,
2838
+ "step": 383000
2839
+ },
2840
+ {
2841
+ "epoch": 0.38,
2842
+ "learning_rate": 1.6341847642035807e-05,
2843
+ "loss": 0.3447,
2844
+ "step": 384000
2845
+ },
2846
+ {
2847
+ "epoch": 0.39,
2848
+ "learning_rate": 1.632891352998103e-05,
2849
+ "loss": 0.3453,
2850
+ "step": 385000
2851
+ },
2852
+ {
2853
+ "epoch": 0.39,
2854
+ "eval_runtime": 3143.6271,
2855
+ "eval_samples_per_second": 108.045,
2856
+ "eval_steps_per_second": 13.506,
2857
+ "step": 385000
2858
+ },
2859
+ {
2860
+ "epoch": 0.39,
2861
+ "learning_rate": 1.631595805024093e-05,
2862
+ "loss": 0.3451,
2863
+ "step": 386000
2864
+ },
2865
+ {
2866
+ "epoch": 0.39,
2867
+ "learning_rate": 1.6302981344494562e-05,
2868
+ "loss": 0.3411,
2869
+ "step": 387000
2870
+ },
2871
+ {
2872
+ "epoch": 0.39,
2873
+ "learning_rate": 1.62899835546531e-05,
2874
+ "loss": 0.3435,
2875
+ "step": 388000
2876
+ },
2877
+ {
2878
+ "epoch": 0.39,
2879
+ "learning_rate": 1.6276964822858297e-05,
2880
+ "loss": 0.3437,
2881
+ "step": 389000
2882
+ },
2883
+ {
2884
+ "epoch": 0.39,
2885
+ "learning_rate": 1.6263925291480904e-05,
2886
+ "loss": 0.3446,
2887
+ "step": 390000
2888
+ },
2889
+ {
2890
+ "epoch": 0.39,
2891
+ "eval_runtime": 3211.4597,
2892
+ "eval_samples_per_second": 105.762,
2893
+ "eval_steps_per_second": 13.22,
2894
+ "step": 390000
2895
  }
2896
  ],
2897
  "max_steps": 1000000,
2898
  "num_train_epochs": 9223372036854775807,
2899
+ "total_flos": 3.4532979675601306e+21,
2900
  "trial_name": null,
2901
  "trial_params": null
2902
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eaf5cca6f5ad9c15d0fba6e119e98e99e1b26c717ce470e245593e0001049d48
3
  size 454197066
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0754eb41ff5a1456b603e8b2753adb511ac0a9ee839b9d73f658f3c72064f5ce
3
  size 454197066