irishprancer commited on
Commit
7415fcb
·
verified ·
1 Parent(s): f523408

Training in progress, step 5550, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4eaaf03b78bd99375228f4e3780fd0588fc02582773008769bf7177550d4b48
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d491ec5c79e068346c07ac72051b147d92ec50b0b38fc5b4f05250ed8013d65
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87baf48f45b21dd7d9a1576417255bf546e8558ecd18cd75468fb9ffa32e54f8
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4499a0d589d85027a908e783ca232b578c49e99556d4d108f652eaa7d4cd5da
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cde3dd12f91204388f748ef22c42d0af6362a11af96ae2767080c430a3556fd7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1be3d3b48fb8768a9f6d52575aecdf595860f5cf577f03b3acc8148f472cbae2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d8a06f6e764a4c806b3b6aa6930ec3c05d14769ecbf5db87f5122a0c04e591e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a533c2b46d6abcf961f28fb57a403e6e075b91a6ddf8fd09d7df3b1d5f213cea
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 234.7826086956522,
5
  "eval_steps": 150,
6
- "global_step": 5400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5227,6 +5227,151 @@
5227
  "EMA_steps_per_second": 25.563,
5228
  "epoch": 234.7826086956522,
5229
  "step": 5400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5230
  }
5231
  ],
5232
  "logging_steps": 10,
@@ -5246,7 +5391,7 @@
5246
  "attributes": {}
5247
  }
5248
  },
5249
- "total_flos": 1.3884011525792563e+17,
5250
  "train_batch_size": 4,
5251
  "trial_name": null,
5252
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166205048561096,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 241.30434782608697,
5
  "eval_steps": 150,
6
+ "global_step": 5550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5227
  "EMA_steps_per_second": 25.563,
5228
  "epoch": 234.7826086956522,
5229
  "step": 5400
5230
+ },
5231
+ {
5232
+ "epoch": 235.2173913043478,
5233
+ "grad_norm": 2.2689285278320312,
5234
+ "learning_rate": 1.0855486739416115e-06,
5235
+ "loss": 0.2335,
5236
+ "step": 5410
5237
+ },
5238
+ {
5239
+ "epoch": 235.65217391304347,
5240
+ "grad_norm": 2.205592393875122,
5241
+ "learning_rate": 1.085547275540868e-06,
5242
+ "loss": 0.2066,
5243
+ "step": 5420
5244
+ },
5245
+ {
5246
+ "epoch": 236.08695652173913,
5247
+ "grad_norm": 2.256180763244629,
5248
+ "learning_rate": 1.0855456620030405e-06,
5249
+ "loss": 0.2442,
5250
+ "step": 5430
5251
+ },
5252
+ {
5253
+ "epoch": 236.52173913043478,
5254
+ "grad_norm": 2.169275999069214,
5255
+ "learning_rate": 1.0855438333287692e-06,
5256
+ "loss": 0.1983,
5257
+ "step": 5440
5258
+ },
5259
+ {
5260
+ "epoch": 236.95652173913044,
5261
+ "grad_norm": 2.1479029655456543,
5262
+ "learning_rate": 1.0855417895187786e-06,
5263
+ "loss": 0.2359,
5264
+ "step": 5450
5265
+ },
5266
+ {
5267
+ "epoch": 237.3913043478261,
5268
+ "grad_norm": 1.7530748844146729,
5269
+ "learning_rate": 1.0855395305738789e-06,
5270
+ "loss": 0.2375,
5271
+ "step": 5460
5272
+ },
5273
+ {
5274
+ "epoch": 237.82608695652175,
5275
+ "grad_norm": 1.93467116355896,
5276
+ "learning_rate": 1.0855370564949654e-06,
5277
+ "loss": 0.2229,
5278
+ "step": 5470
5279
+ },
5280
+ {
5281
+ "epoch": 238.2608695652174,
5282
+ "grad_norm": 3.3168399333953857,
5283
+ "learning_rate": 1.0855343672830188e-06,
5284
+ "loss": 0.2231,
5285
+ "step": 5480
5286
+ },
5287
+ {
5288
+ "epoch": 238.69565217391303,
5289
+ "grad_norm": 2.073918342590332,
5290
+ "learning_rate": 1.085531462939105e-06,
5291
+ "loss": 0.223,
5292
+ "step": 5490
5293
+ },
5294
+ {
5295
+ "epoch": 239.1304347826087,
5296
+ "grad_norm": 2.3649418354034424,
5297
+ "learning_rate": 1.085528343464375e-06,
5298
+ "loss": 0.2133,
5299
+ "step": 5500
5300
+ },
5301
+ {
5302
+ "epoch": 239.56521739130434,
5303
+ "grad_norm": 2.719287395477295,
5304
+ "learning_rate": 1.0855250088600655e-06,
5305
+ "loss": 0.2752,
5306
+ "step": 5510
5307
+ },
5308
+ {
5309
+ "epoch": 240.0,
5310
+ "grad_norm": 5.105301380157471,
5311
+ "learning_rate": 1.0855214591274984e-06,
5312
+ "loss": 0.1964,
5313
+ "step": 5520
5314
+ },
5315
+ {
5316
+ "epoch": 240.43478260869566,
5317
+ "grad_norm": 1.5361961126327515,
5318
+ "learning_rate": 1.0855176942680803e-06,
5319
+ "loss": 0.2309,
5320
+ "step": 5530
5321
+ },
5322
+ {
5323
+ "epoch": 240.8695652173913,
5324
+ "grad_norm": 2.835388660430908,
5325
+ "learning_rate": 1.0855137142833035e-06,
5326
+ "loss": 0.2229,
5327
+ "step": 5540
5328
+ },
5329
+ {
5330
+ "epoch": 241.30434782608697,
5331
+ "grad_norm": 2.0795018672943115,
5332
+ "learning_rate": 1.0855095191747456e-06,
5333
+ "loss": 0.2335,
5334
+ "step": 5550
5335
+ },
5336
+ {
5337
+ "epoch": 241.30434782608697,
5338
+ "eval_loss": 0.9892138242721558,
5339
+ "eval_runtime": 0.5423,
5340
+ "eval_samples_per_second": 18.441,
5341
+ "eval_steps_per_second": 18.441,
5342
+ "step": 5550
5343
+ },
5344
+ {
5345
+ "Start_State_loss": 0.8609819412231445,
5346
+ "Start_State_runtime": 0.3972,
5347
+ "Start_State_samples_per_second": 25.175,
5348
+ "Start_State_steps_per_second": 25.175,
5349
+ "epoch": 241.30434782608697,
5350
+ "step": 5550
5351
+ },
5352
+ {
5353
+ "Raw_Model_loss": 0.9892138242721558,
5354
+ "Raw_Model_runtime": 0.4138,
5355
+ "Raw_Model_samples_per_second": 24.169,
5356
+ "Raw_Model_steps_per_second": 24.169,
5357
+ "epoch": 241.30434782608697,
5358
+ "step": 5550
5359
+ },
5360
+ {
5361
+ "SWA_loss": 0.831312358379364,
5362
+ "SWA_runtime": 0.4189,
5363
+ "SWA_samples_per_second": 23.872,
5364
+ "SWA_steps_per_second": 23.872,
5365
+ "epoch": 241.30434782608697,
5366
+ "step": 5550
5367
+ },
5368
+ {
5369
+ "EMA_loss": 0.8599440455436707,
5370
+ "EMA_runtime": 0.4024,
5371
+ "EMA_samples_per_second": 24.854,
5372
+ "EMA_steps_per_second": 24.854,
5373
+ "epoch": 241.30434782608697,
5374
+ "step": 5550
5375
  }
5376
  ],
5377
  "logging_steps": 10,
 
5391
  "attributes": {}
5392
  }
5393
  },
5394
+ "total_flos": 1.4269374356277658e+17,
5395
  "train_batch_size": 4,
5396
  "trial_name": null,
5397
  "trial_params": null