smirki commited on
Commit
54ed0bf
·
verified ·
1 Parent(s): 7898b21

Training in progress, step 700, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dde5e9886df1a06cfdfe007189a6433cb7a5f73cb42b5a5b386732a460844a5b
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5a04e84830bbd6a9d15a7f1b7837c35b2c5aa3c3d810fb936a39fabd501f732
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6af8a0ed34f4ca382db8716933993dfd5adf698a5f8abc4921176880d589a90
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9d953155e50b0ab53fcf21d7698c3548b16bde8faffe38265b83b4c176691fe
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c09760c610e47668e35c66624317f7474be74f8d6ddb09b4e5b00e8bf9d9f0ca
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d69b3ec032c894f09c29bf70e7979ef1f260d871795cce9f40bc39aed3eb516f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59f0296be23c202d35d7785a2cf57d4f962424acd2684f32b917f44308d185d8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29fe942acb075168b131d317a2f6faee5c51c7e00aad3609dac6f7c5e3261669
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.005375873081609735,
5
  "eval_steps": 500,
6
- "global_step": 675,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -811,6 +811,42 @@
811
  "reward_std": 0.26030006259679794,
812
  "rewards/custom_reward_simplified_v7_dblog": 0.81875,
813
  "step": 670
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
814
  }
815
  ],
816
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.005574979492039726,
5
  "eval_steps": 500,
6
+ "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
811
  "reward_std": 0.26030006259679794,
812
  "rewards/custom_reward_simplified_v7_dblog": 0.81875,
813
  "step": 670
814
+ },
815
+ {
816
+ "completion_length": 696.30625,
817
+ "epoch": 0.005415694363695734,
818
+ "grad_norm": 0.2144252061843872,
819
+ "kl": 0.005292760988231749,
820
+ "learning_rate": 4.50530798188761e-06,
821
+ "loss": 0.0002,
822
+ "reward": 0.609375,
823
+ "reward_std": 0.2595392823219299,
824
+ "rewards/custom_reward_simplified_v7_dblog": 0.609375,
825
+ "step": 680
826
+ },
827
+ {
828
+ "completion_length": 696.99375,
829
+ "epoch": 0.00549533692786773,
830
+ "grad_norm": 0.006262101698666811,
831
+ "kl": 0.005413674132432789,
832
+ "learning_rate": 4.4833833507280884e-06,
833
+ "loss": 0.0002,
834
+ "reward": 0.684375,
835
+ "reward_std": 0.24843912497162818,
836
+ "rewards/custom_reward_simplified_v7_dblog": 0.684375,
837
+ "step": 690
838
+ },
839
+ {
840
+ "completion_length": 675.50625,
841
+ "epoch": 0.005574979492039726,
842
+ "grad_norm": 0.16301825642585754,
843
+ "kl": 0.005892223375849426,
844
+ "learning_rate": 4.46103916229894e-06,
845
+ "loss": 0.0002,
846
+ "reward": 0.80625,
847
+ "reward_std": 0.34091843143105505,
848
+ "rewards/custom_reward_simplified_v7_dblog": 0.80625,
849
+ "step": 700
850
  }
851
  ],
852
  "logging_steps": 10,