iamnguyen commited on
Commit
a7d8a98
1 Parent(s): 24a33a0

Training in progress, step 5920, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8b79b765163726683a5b97fb0c1abea490722bbbf1fc391f9b7a985bcdd16c3
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8386ad9cbf76ebd86a333cb2de2e393ac58f2eb5d5851c4a37eaf7dfabf8e1ab
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c4be8aec95d333750d7914e18caa878b08e168adf924ebf31647ab744346164
3
  size 240728404
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae6e60f3154077edf3be4b7a48662dd49359a11649f46c508259fc0512184b10
3
  size 240728404
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d08e0bef61039a4f769cc5e4c3c08f715d445eab2242d23ea2e8a9e30cd2439d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80758c06b27a6f832fca7f167d67f92046d71167a066f87edf1f02350e7dfed9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.38170048730816786,
5
  "eval_steps": 500,
6
- "global_step": 5904,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -41335,6 +41335,118 @@
41335
  "learning_rate": 6.906780795455553e-06,
41336
  "loss": 1.2322,
41337
  "step": 5904
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41338
  }
41339
  ],
41340
  "logging_steps": 1,
@@ -41354,7 +41466,7 @@
41354
  "attributes": {}
41355
  }
41356
  },
41357
- "total_flos": 5.346276383970312e+18,
41358
  "train_batch_size": 4,
41359
  "trial_name": null,
41360
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.38273490597295967,
5
  "eval_steps": 500,
6
+ "global_step": 5920,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
41335
  "learning_rate": 6.906780795455553e-06,
41336
  "loss": 1.2322,
41337
  "step": 5904
41338
+ },
41339
+ {
41340
+ "epoch": 0.3817651384747174,
41341
+ "grad_norm": 2.9935035705566406,
41342
+ "learning_rate": 6.905832421729944e-06,
41343
+ "loss": 1.0633,
41344
+ "step": 5905
41345
+ },
41346
+ {
41347
+ "epoch": 0.38182978964126685,
41348
+ "grad_norm": 2.6144261360168457,
41349
+ "learning_rate": 6.9048839677773235e-06,
41350
+ "loss": 1.1399,
41351
+ "step": 5906
41352
+ },
41353
+ {
41354
+ "epoch": 0.3818944408078163,
41355
+ "grad_norm": 2.6496615409851074,
41356
+ "learning_rate": 6.9039354336376195e-06,
41357
+ "loss": 1.1642,
41358
+ "step": 5907
41359
+ },
41360
+ {
41361
+ "epoch": 0.38195909197436584,
41362
+ "grad_norm": 2.639331340789795,
41363
+ "learning_rate": 6.902986819350757e-06,
41364
+ "loss": 1.1909,
41365
+ "step": 5908
41366
+ },
41367
+ {
41368
+ "epoch": 0.3820237431409153,
41369
+ "grad_norm": 2.60591197013855,
41370
+ "learning_rate": 6.90203812495667e-06,
41371
+ "loss": 1.0447,
41372
+ "step": 5909
41373
+ },
41374
+ {
41375
+ "epoch": 0.3820883943074648,
41376
+ "grad_norm": 2.6222944259643555,
41377
+ "learning_rate": 6.901089350495296e-06,
41378
+ "loss": 1.1576,
41379
+ "step": 5910
41380
+ },
41381
+ {
41382
+ "epoch": 0.3821530454740143,
41383
+ "grad_norm": 2.601048231124878,
41384
+ "learning_rate": 6.900140496006572e-06,
41385
+ "loss": 1.2307,
41386
+ "step": 5911
41387
+ },
41388
+ {
41389
+ "epoch": 0.38221769664056376,
41390
+ "grad_norm": 2.1494765281677246,
41391
+ "learning_rate": 6.899191561530441e-06,
41392
+ "loss": 1.1478,
41393
+ "step": 5912
41394
+ },
41395
+ {
41396
+ "epoch": 0.38228234780711323,
41397
+ "grad_norm": 2.7690351009368896,
41398
+ "learning_rate": 6.8982425471068495e-06,
41399
+ "loss": 1.1497,
41400
+ "step": 5913
41401
+ },
41402
+ {
41403
+ "epoch": 0.38234699897366276,
41404
+ "grad_norm": 2.669267177581787,
41405
+ "learning_rate": 6.897293452775746e-06,
41406
+ "loss": 1.2059,
41407
+ "step": 5914
41408
+ },
41409
+ {
41410
+ "epoch": 0.3824116501402122,
41411
+ "grad_norm": 2.6427619457244873,
41412
+ "learning_rate": 6.896344278577083e-06,
41413
+ "loss": 1.1997,
41414
+ "step": 5915
41415
+ },
41416
+ {
41417
+ "epoch": 0.3824763013067617,
41418
+ "grad_norm": 2.8328170776367188,
41419
+ "learning_rate": 6.8953950245508186e-06,
41420
+ "loss": 1.174,
41421
+ "step": 5916
41422
+ },
41423
+ {
41424
+ "epoch": 0.3825409524733112,
41425
+ "grad_norm": 2.3416833877563477,
41426
+ "learning_rate": 6.894445690736911e-06,
41427
+ "loss": 1.2323,
41428
+ "step": 5917
41429
+ },
41430
+ {
41431
+ "epoch": 0.3826056036398607,
41432
+ "grad_norm": 2.6076183319091797,
41433
+ "learning_rate": 6.89349627717532e-06,
41434
+ "loss": 1.2821,
41435
+ "step": 5918
41436
+ },
41437
+ {
41438
+ "epoch": 0.38267025480641015,
41439
+ "grad_norm": 2.3602590560913086,
41440
+ "learning_rate": 6.892546783906016e-06,
41441
+ "loss": 1.1353,
41442
+ "step": 5919
41443
+ },
41444
+ {
41445
+ "epoch": 0.38273490597295967,
41446
+ "grad_norm": 2.70065975189209,
41447
+ "learning_rate": 6.891597210968965e-06,
41448
+ "loss": 1.1731,
41449
+ "step": 5920
41450
  }
41451
  ],
41452
  "logging_steps": 1,
 
41466
  "attributes": {}
41467
  }
41468
  },
41469
+ "total_flos": 5.360788093260792e+18,
41470
  "train_batch_size": 4,
41471
  "trial_name": null,
41472
  "trial_params": null