g8a9 commited on
Commit
dedc74d
1 Parent(s): 46c8677

Upload folder using huggingface_hub

Browse files
pytorch_model-00001-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2a8a9cde4c46113cbbf48ae220d46fa6219fad20c081a6bf0aedee0e9719f7e
3
  size 9949048046
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:667777c452190f8efb5e339659cf4fa836a754a548e0733c79257adbd40e4a11
3
  size 9949048046
pytorch_model-00002-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c4e0301bb2e41ff4ff0efb7bdc0cfe5cab5e5713aa08138b7624806cfa7450f
3
  size 9904474400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc05ba86ee883984ddcd71eb936a37e3d0b8c83ba9eff9d9f8a9a921dc0dacf9
3
  size 9904474400
pytorch_model-00003-of-00003.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c085de478c04c9209f03c904d8d33b01895c7622458b9895f775f397e6a75cf4
3
  size 6179210249
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75fcd7c91eeaf5a795d2fcc071e019dfcfdce82b861e42071e758e82882d68b6
3
  size 6179210249
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51edccd6f9855740cdfe1d96d81afba5cfd7b32b28cf15897f8fbadf588f1179
3
  size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae77d1d86eb7275f2a9fab8fdd0f3d2e9b085c5393ceeb24294803290f3941e
3
  size 14511
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec0a535d2c9c4c62a74336a7f93b6d947a1152f53a6066eccd4123d6b477c15c
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3415aff70181afd51efc048fb7c4348442ee37b0317e7e93f002f0a59e0e3ea3
3
  size 627
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.005037789125310321,
5
- "global_step": 8000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2782,11 +2782,79 @@
2782
  "eval_samples_per_second": 2.969,
2783
  "eval_steps_per_second": 0.742,
2784
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2785
  }
2786
  ],
2787
  "max_steps": 10000,
2788
  "num_train_epochs": 1,
2789
- "total_flos": 2.484742130335789e+19,
2790
  "trial_name": null,
2791
  "trial_params": null
2792
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.005352650945642216,
5
+ "global_step": 8500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2782
  "eval_samples_per_second": 2.969,
2783
  "eval_steps_per_second": 0.742,
2784
  "step": 8000
2785
+ },
2786
+ {
2787
+ "epoch": 0.01,
2788
+ "learning_rate": 0.0002,
2789
+ "loss": 1.9936,
2790
+ "step": 8050
2791
+ },
2792
+ {
2793
+ "epoch": 0.01,
2794
+ "learning_rate": 0.0002,
2795
+ "loss": 1.9682,
2796
+ "step": 8100
2797
+ },
2798
+ {
2799
+ "epoch": 0.01,
2800
+ "learning_rate": 0.0002,
2801
+ "loss": 2.0048,
2802
+ "step": 8150
2803
+ },
2804
+ {
2805
+ "epoch": 0.01,
2806
+ "learning_rate": 0.0002,
2807
+ "loss": 1.9783,
2808
+ "step": 8200
2809
+ },
2810
+ {
2811
+ "epoch": 0.01,
2812
+ "learning_rate": 0.0002,
2813
+ "loss": 1.9703,
2814
+ "step": 8250
2815
+ },
2816
+ {
2817
+ "epoch": 0.01,
2818
+ "learning_rate": 0.0002,
2819
+ "loss": 2.0042,
2820
+ "step": 8300
2821
+ },
2822
+ {
2823
+ "epoch": 0.01,
2824
+ "learning_rate": 0.0002,
2825
+ "loss": 1.9848,
2826
+ "step": 8350
2827
+ },
2828
+ {
2829
+ "epoch": 0.01,
2830
+ "learning_rate": 0.0002,
2831
+ "loss": 1.9808,
2832
+ "step": 8400
2833
+ },
2834
+ {
2835
+ "epoch": 0.01,
2836
+ "learning_rate": 0.0002,
2837
+ "loss": 1.9871,
2838
+ "step": 8450
2839
+ },
2840
+ {
2841
+ "epoch": 0.01,
2842
+ "learning_rate": 0.0002,
2843
+ "loss": 1.9733,
2844
+ "step": 8500
2845
+ },
2846
+ {
2847
+ "epoch": 0.01,
2848
+ "eval_loss": 2.0437986850738525,
2849
+ "eval_runtime": 34093.5093,
2850
+ "eval_samples_per_second": 2.97,
2851
+ "eval_steps_per_second": 0.743,
2852
+ "step": 8500
2853
  }
2854
  ],
2855
  "max_steps": 10000,
2856
  "num_train_epochs": 1,
2857
+ "total_flos": 2.679889113792e+19,
2858
  "trial_name": null,
2859
  "trial_params": null
2860
  }