Starred commited on
Commit
cc89558
·
verified ·
1 Parent(s): 0de7f0e

Training in progress, step 750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b7426f77545243fe44b61bdf327bbe8386aaecd03d9b377cde6bb8b482e8a3a
3
  size 84972248
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:056b42131c364ac6d3ffdcf649a5c97906ef4f0d8b378519839e223cdd8ac91d
3
  size 84972248
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76fc2fa6250a96154629a5641458d43a79a6deaf4569bdd3d1051fc60b41bdab
3
  size 43434405
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21b04b38bd9193dccba2b26c7971c57e8612c2d88b3fe83ba627fe8b6bf98a41
3
  size 43434405
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:27978b0c0a2d70a6d9e24fba0f2ed928bba9b47aecb6cc92523c33dd4a9bc29f
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa4695b71941286f2b5c53635fbeb413ba790d86d240df638f320993845abe31
3
  size 14917
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d4883ea9e9e4170ef036b08f27772e01a500be9d13e3c9492d90b6357d7e8c6
3
  size 14917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69fba46c77118e21066090a5469435f96321e6406988c306b041caec1e98a4c8
3
  size 14917
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46a5dde96232ac87d1fa1ed9715479941761512093c62e0c951b01e5c58be3a3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d789993928e049b3b0b113d88443633a86c205feae04d9e432a85efd6874b32
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 500,
3
- "best_metric": 0.40328726172447205,
4
- "best_model_checkpoint": "/kaggle/working/obsidian_critic_qwen35_t4x2_unsloth/runs/obsidian_critic_full_epoch/checkpoint-500",
5
- "epoch": 0.22171719971176765,
6
  "eval_steps": 125,
7
- "global_step": 500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -192,6 +192,92 @@
192
  "tokens_per_second": 368.8907701487212,
193
  "tokens_per_step": 1793.302,
194
  "total_tokens_seen": 896651
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  }
196
  ],
197
  "logging_steps": 50,
@@ -220,7 +306,7 @@
220
  "attributes": {}
221
  }
222
  },
223
- "total_flos": 3.66545497793495e+16,
224
  "train_batch_size": 1,
225
  "trial_name": null,
226
  "trial_params": null
 
1
  {
2
+ "best_global_step": 750,
3
+ "best_metric": 0.3200623393058777,
4
+ "best_model_checkpoint": "/kaggle/working/obsidian_critic_qwen35_t4x2_unsloth/runs/obsidian_critic_full_epoch/checkpoint-750",
5
+ "epoch": 0.3325757995676515,
6
  "eval_steps": 125,
7
+ "global_step": 750,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
192
  "tokens_per_second": 368.8907701487212,
193
  "tokens_per_step": 1793.302,
194
  "total_tokens_seen": 896651
195
+ },
196
+ {
197
+ "epoch": 0.2438889196829444,
198
+ "grad_norm": 0.7751753330230713,
199
+ "last_batch_tokens": 273,
200
+ "learning_rate": 8.626831825760946e-05,
201
+ "loss": 0.3414393615722656,
202
+ "lr": 8.622024749619364e-05,
203
+ "step": 550,
204
+ "tokens_per_second": 82.92877874873523,
205
+ "tokens_per_step": 1766.3690909090908,
206
+ "total_tokens_seen": 971503
207
+ },
208
+ {
209
+ "epoch": 0.2660606396541212,
210
+ "grad_norm": 0.7136653065681458,
211
+ "last_batch_tokens": 305,
212
+ "learning_rate": 8.378022494113098e-05,
213
+ "loss": 0.3377827072143555,
214
+ "lr": 8.372874417081631e-05,
215
+ "step": 600,
216
+ "tokens_per_second": 90.40251231127895,
217
+ "tokens_per_step": 1748.685,
218
+ "total_tokens_seen": 1049211
219
+ },
220
+ {
221
+ "epoch": 0.27714649963970955,
222
+ "eval_loss": 0.35334891080856323,
223
+ "eval_runtime": 87.0325,
224
+ "eval_samples_per_second": 4.171,
225
+ "eval_steps_per_second": 2.091,
226
+ "last_batch_tokens": 172,
227
+ "lr": 8.24206361704162e-05,
228
+ "step": 625,
229
+ "tokens_per_second": 135.75737480096265,
230
+ "tokens_per_step": 1791.824,
231
+ "total_tokens_seen": 1119890
232
+ },
233
+ {
234
+ "epoch": 0.2882323596252979,
235
+ "grad_norm": 0.7202998399734497,
236
+ "last_batch_tokens": 211,
237
+ "learning_rate": 8.112770389539574e-05,
238
+ "loss": 0.3233934020996094,
239
+ "lr": 8.107306370261785e-05,
240
+ "step": 650,
241
+ "tokens_per_second": 84.5144051400581,
242
+ "tokens_per_step": 1779.3815384615384,
243
+ "total_tokens_seen": 1156598
244
+ },
245
+ {
246
+ "epoch": 0.3104040795964747,
247
+ "grad_norm": 0.7681185007095337,
248
+ "last_batch_tokens": 236,
249
+ "learning_rate": 7.832366646167268e-05,
250
+ "loss": 0.3125551414489746,
251
+ "lr": 7.826613281158841e-05,
252
+ "step": 700,
253
+ "tokens_per_second": 84.37944807859942,
254
+ "tokens_per_step": 1759.6771428571428,
255
+ "total_tokens_seen": 1231774
256
+ },
257
+ {
258
+ "epoch": 0.3325757995676515,
259
+ "grad_norm": 0.659271776676178,
260
+ "last_batch_tokens": 939,
261
+ "learning_rate": 7.538176149839243e-05,
262
+ "loss": 0.28798053741455076,
263
+ "lr": 7.532161444027488e-05,
264
+ "step": 750,
265
+ "tokens_per_second": 87.73140620694117,
266
+ "tokens_per_step": 1745.06,
267
+ "total_tokens_seen": 1308795
268
+ },
269
+ {
270
+ "epoch": 0.3325757995676515,
271
+ "eval_loss": 0.3200623393058777,
272
+ "eval_runtime": 87.2377,
273
+ "eval_samples_per_second": 4.161,
274
+ "eval_steps_per_second": 2.086,
275
+ "last_batch_tokens": 172,
276
+ "lr": 7.532161444027488e-05,
277
+ "step": 750,
278
+ "tokens_per_second": 368.35941630029333,
279
+ "tokens_per_step": 1787.9106666666667,
280
+ "total_tokens_seen": 1340933
281
  }
282
  ],
283
  "logging_steps": 50,
 
306
  "attributes": {}
307
  }
308
  },
309
+ "total_flos": 5.483273203482624e+16,
310
  "train_batch_size": 1,
311
  "trial_name": null,
312
  "trial_params": null