smirki commited on
Commit
857974b
·
verified ·
1 Parent(s): 8bbb5ff

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f0b9fe960d8b700432d64261f2e9fb8b5e2feb648ccba2b26954af39f14f187
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d0b10032621567c53e0e12f4051e3fa7f9880f7b87ae153f1600c2eed98d364
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6980510b381af43d0d08ceb7a59264815242f1f6223f0a5de785069b7c5b74d7
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4c7399a7124ebfc8f2602ee4160ffecfe9c2c5345d69e9697c401f6fd3bd73d
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63e7217b76d9787359cf45d83ba0b63cb6335c60810ebfe7324880fdde71d442
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d71d011e87b512f28e794476e44bdcb409ab9a4721e9b4147120eeb12f1053d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9d42f94a00d3d38a441f86dbb87d1da2ac6b6e6fceeb3fff0437ffb348f193a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47394e7e6639b2da14254e3e88e50a05cb1b1d15a05d1aa46398ae3b93c7909f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0011946384625799412,
5
  "eval_steps": 500,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -187,6 +187,66 @@
187
  "reward_std": 0.430637900531292,
188
  "rewards/custom_reward_logic_v2": -0.04024999849498272,
189
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  }
191
  ],
192
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0015928512834399217,
5
  "eval_steps": 500,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
187
  "reward_std": 0.430637900531292,
188
  "rewards/custom_reward_logic_v2": -0.04024999849498272,
189
  "step": 150
190
+ },
191
+ {
192
+ "completion_length": 18.93125,
193
+ "epoch": 0.0012742810267519374,
194
+ "grad_norm": 0.032512303441762924,
195
+ "kl": 0.33459745422005654,
196
+ "learning_rate": 4.849231551964771e-06,
197
+ "loss": 0.0134,
198
+ "reward": 0.1650000035762787,
199
+ "reward_std": 0.07605109438300132,
200
+ "rewards/custom_reward_logic_v2": 0.1650000035762787,
201
+ "step": 160
202
+ },
203
+ {
204
+ "completion_length": 17.4375,
205
+ "epoch": 0.0013539235909239334,
206
+ "grad_norm": 0.02004638873040676,
207
+ "kl": 0.35064528286457064,
208
+ "learning_rate": 4.809698831278217e-06,
209
+ "loss": 0.014,
210
+ "reward": 0.08999999985098839,
211
+ "reward_std": 0.125558003783226,
212
+ "rewards/custom_reward_logic_v2": 0.08999999985098839,
213
+ "step": 170
214
+ },
215
+ {
216
+ "completion_length": 23.975,
217
+ "epoch": 0.0014335661550959294,
218
+ "grad_norm": 0.2281995564699173,
219
+ "kl": 0.3118164837360382,
220
+ "learning_rate": 4.765769467591626e-06,
221
+ "loss": 0.0125,
222
+ "reward": 0.08099999986588954,
223
+ "reward_std": 0.18301311507821083,
224
+ "rewards/custom_reward_logic_v2": 0.08099999986588954,
225
+ "step": 180
226
+ },
227
+ {
228
+ "completion_length": 17.7,
229
+ "epoch": 0.0015132087192679256,
230
+ "grad_norm": 0.20832708477973938,
231
+ "kl": 0.34881954491138456,
232
+ "learning_rate": 4.717527082945555e-06,
233
+ "loss": 0.014,
234
+ "reward": 0.14687500111758708,
235
+ "reward_std": 0.13193419948220253,
236
+ "rewards/custom_reward_logic_v2": 0.14687500111758708,
237
+ "step": 190
238
+ },
239
+ {
240
+ "completion_length": 28.76875,
241
+ "epoch": 0.0015928512834399217,
242
+ "grad_norm": 0.2148224264383316,
243
+ "kl": 0.4086977861821651,
244
+ "learning_rate": 4.665063509461098e-06,
245
+ "loss": 0.0163,
246
+ "reward": 0.06411250084638595,
247
+ "reward_std": 0.09681975245475768,
248
+ "rewards/custom_reward_logic_v2": 0.06411250084638595,
249
+ "step": 200
250
  }
251
  ],
252
  "logging_steps": 10,