Krish356 commited on
Commit
48276ae
·
verified ·
1 Parent(s): d8c8a30

Training in progress, step 180, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a6d8f0f796537366c6e78ca004befff9f9c27672a628bae1e611c8bc0f94c8c
3
  size 3380768360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd22194ae981192953c10bd8c7d7c229d363fe3051c02792c9530080c309db2d
3
  size 3380768360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ffd75d21ac4ac0a8645a72715f91e4d5f09c05dd5a2548ed04a8b49d623fc3a5
3
  size 1757899449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd7667b7685377e93cb674f532d76551a3a23b1d3a5e2dd6690c6cc456e5c3b4
3
  size 1757899449
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5dc9af15ae765cffc21eeb6ddbc68a2629e47a5fc5164b3c35695e55c025ec4
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7278f9a4041731694f91598435752e0692e41d16df60add795880e049f862551
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:213d03f95061a3291403e8d5572036299f2f6f739be51135e2941aff4f3ccff7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce07496a37a4ed6b1a548b1e73eeccecc1cf6f60eafddb58dcb81201d88a83bc
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.5644599303135889,
6
  "eval_steps": 30,
7
- "global_step": 150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -265,6 +265,56 @@
265
  "eval_samples_per_second": 0.291,
266
  "eval_steps_per_second": 0.073,
267
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  }
269
  ],
270
  "logging_steps": 5,
@@ -284,7 +334,7 @@
284
  "attributes": {}
285
  }
286
  },
287
- "total_flos": 9.840854969157304e+17,
288
  "train_batch_size": 8,
289
  "trial_name": null,
290
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.8780487804878048,
6
  "eval_steps": 30,
7
+ "global_step": 180,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
265
  "eval_samples_per_second": 0.291,
266
  "eval_steps_per_second": 0.073,
267
  "step": 150
268
+ },
269
+ {
270
+ "epoch": 1.6167247386759582,
271
+ "grad_norm": 0.07571443915367126,
272
+ "learning_rate": 9.949945276530781e-06,
273
+ "loss": 0.205,
274
+ "step": 155
275
+ },
276
+ {
277
+ "epoch": 1.6689895470383276,
278
+ "grad_norm": 0.08948186039924622,
279
+ "learning_rate": 7.5677871252624485e-06,
280
+ "loss": 0.2501,
281
+ "step": 160
282
+ },
283
+ {
284
+ "epoch": 1.721254355400697,
285
+ "grad_norm": 0.185760036110878,
286
+ "learning_rate": 5.488077459582425e-06,
287
+ "loss": 0.3175,
288
+ "step": 165
289
+ },
290
+ {
291
+ "epoch": 1.773519163763066,
292
+ "grad_norm": 0.055869363248348236,
293
+ "learning_rate": 3.7256400418220262e-06,
294
+ "loss": 0.1723,
295
+ "step": 170
296
+ },
297
+ {
298
+ "epoch": 1.8257839721254356,
299
+ "grad_norm": 0.0660533756017685,
300
+ "learning_rate": 2.2930371799975594e-06,
301
+ "loss": 0.1959,
302
+ "step": 175
303
+ },
304
+ {
305
+ "epoch": 1.8780487804878048,
306
+ "grad_norm": 0.07585973292589188,
307
+ "learning_rate": 1.2004801861442371e-06,
308
+ "loss": 0.2145,
309
+ "step": 180
310
+ },
311
+ {
312
+ "epoch": 1.8780487804878048,
313
+ "eval_loss": 0.23282098770141602,
314
+ "eval_runtime": 1752.5559,
315
+ "eval_samples_per_second": 0.291,
316
+ "eval_steps_per_second": 0.073,
317
+ "step": 180
318
  }
319
  ],
320
  "logging_steps": 5,
 
334
  "attributes": {}
335
  }
336
  },
337
+ "total_flos": 1.1783682305701724e+18,
338
  "train_batch_size": 8,
339
  "trial_name": null,
340
  "trial_params": null