mikhail-panzo commited on
Commit
64c202c
1 Parent(s): 6cd6061

Training in progress, step 2500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:568beaea7b10151ac36b16502a905b74eb42e9692e9cf626d4e54483d9f62675
3
  size 577789320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3125c5cdd907859825b85e2b44f1a2c45eced9608219b8491212276b64dd40b6
3
  size 577789320
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07e91e95945e3993b28704b8a8ff0fe58a187246a0cce963427a80f323bcbc41
3
  size 1155772233
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36e743907e3ef9df701cfdaa04f41653cedb1fb402064f70e353c7669c6b3222
3
  size 1155772233
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ef10d6e78066e1e0b64d0a4f7c5c873eaaaea2dfe3daf019e2a56206b658ad5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6562043b413fa7e5997eaf05f34bba7850c0b7649af0796d00c8d62fb292ee1d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8200c7911b70674e95f6ea3c4fd4f7c5a623c58e7efa3752996c3106e6a97146
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d180e6bb7b7c41ee65a41a5e646849b13af9646efdef1095f15a2a7e5f3cda1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.4016592502593994,
3
- "best_model_checkpoint": "mikhail-panzo/ceb_b128_le5_s4000/checkpoint-2000",
4
- "epoch": 156.86274509803923,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -319,6 +319,84 @@
319
  "eval_samples_per_second": 27.954,
320
  "eval_steps_per_second": 3.572,
321
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  }
323
  ],
324
  "logging_steps": 50,
@@ -338,7 +416,7 @@
338
  "attributes": {}
339
  }
340
  },
341
- "total_flos": 4.352128118839037e+16,
342
  "train_batch_size": 32,
343
  "trial_name": null,
344
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.39692553877830505,
3
+ "best_model_checkpoint": "mikhail-panzo/ceb_b128_le5_s4000/checkpoint-2500",
4
+ "epoch": 196.07843137254903,
5
  "eval_steps": 500,
6
+ "global_step": 2500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
319
  "eval_samples_per_second": 27.954,
320
  "eval_steps_per_second": 3.572,
321
  "step": 2000
322
+ },
323
+ {
324
+ "epoch": 160.7843137254902,
325
+ "grad_norm": 0.7746613025665283,
326
+ "learning_rate": 9.755e-06,
327
+ "loss": 0.4306,
328
+ "step": 2050
329
+ },
330
+ {
331
+ "epoch": 164.7058823529412,
332
+ "grad_norm": 0.6868831515312195,
333
+ "learning_rate": 9.505000000000001e-06,
334
+ "loss": 0.4302,
335
+ "step": 2100
336
+ },
337
+ {
338
+ "epoch": 168.62745098039215,
339
+ "grad_norm": 1.010834813117981,
340
+ "learning_rate": 9.255e-06,
341
+ "loss": 0.4254,
342
+ "step": 2150
343
+ },
344
+ {
345
+ "epoch": 172.54901960784315,
346
+ "grad_norm": 1.054592490196228,
347
+ "learning_rate": 9.005000000000001e-06,
348
+ "loss": 0.4248,
349
+ "step": 2200
350
+ },
351
+ {
352
+ "epoch": 176.47058823529412,
353
+ "grad_norm": 0.8121660351753235,
354
+ "learning_rate": 8.755e-06,
355
+ "loss": 0.4227,
356
+ "step": 2250
357
+ },
358
+ {
359
+ "epoch": 180.3921568627451,
360
+ "grad_norm": 0.6637047529220581,
361
+ "learning_rate": 8.505e-06,
362
+ "loss": 0.4232,
363
+ "step": 2300
364
+ },
365
+ {
366
+ "epoch": 184.31372549019608,
367
+ "grad_norm": 1.0822277069091797,
368
+ "learning_rate": 8.255000000000001e-06,
369
+ "loss": 0.4226,
370
+ "step": 2350
371
+ },
372
+ {
373
+ "epoch": 188.23529411764707,
374
+ "grad_norm": 0.759693443775177,
375
+ "learning_rate": 8.005e-06,
376
+ "loss": 0.4236,
377
+ "step": 2400
378
+ },
379
+ {
380
+ "epoch": 192.15686274509804,
381
+ "grad_norm": 0.576042652130127,
382
+ "learning_rate": 7.755000000000001e-06,
383
+ "loss": 0.4162,
384
+ "step": 2450
385
+ },
386
+ {
387
+ "epoch": 196.07843137254903,
388
+ "grad_norm": 0.8360034227371216,
389
+ "learning_rate": 7.505e-06,
390
+ "loss": 0.4223,
391
+ "step": 2500
392
+ },
393
+ {
394
+ "epoch": 196.07843137254903,
395
+ "eval_loss": 0.39692553877830505,
396
+ "eval_runtime": 6.4387,
397
+ "eval_samples_per_second": 27.956,
398
+ "eval_steps_per_second": 3.572,
399
+ "step": 2500
400
  }
401
  ],
402
  "logging_steps": 50,
 
416
  "attributes": {}
417
  }
418
  },
419
+ "total_flos": 5.440105584628308e+16,
420
  "train_batch_size": 32,
421
  "trial_name": null,
422
  "trial_params": null