Training in progress, step 5920, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 479769104
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8386ad9cbf76ebd86a333cb2de2e393ac58f2eb5d5851c4a37eaf7dfabf8e1ab
|
3 |
size 479769104
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 240728404
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae6e60f3154077edf3be4b7a48662dd49359a11649f46c508259fc0512184b10
|
3 |
size 240728404
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:80758c06b27a6f832fca7f167d67f92046d71167a066f87edf1f02350e7dfed9
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -41335,6 +41335,118 @@
|
|
41335 |
"learning_rate": 6.906780795455553e-06,
|
41336 |
"loss": 1.2322,
|
41337 |
"step": 5904
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41338 |
}
|
41339 |
],
|
41340 |
"logging_steps": 1,
|
@@ -41354,7 +41466,7 @@
|
|
41354 |
"attributes": {}
|
41355 |
}
|
41356 |
},
|
41357 |
-
"total_flos": 5.
|
41358 |
"train_batch_size": 4,
|
41359 |
"trial_name": null,
|
41360 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.38273490597295967,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 5920,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
41335 |
"learning_rate": 6.906780795455553e-06,
|
41336 |
"loss": 1.2322,
|
41337 |
"step": 5904
|
41338 |
+
},
|
41339 |
+
{
|
41340 |
+
"epoch": 0.3817651384747174,
|
41341 |
+
"grad_norm": 2.9935035705566406,
|
41342 |
+
"learning_rate": 6.905832421729944e-06,
|
41343 |
+
"loss": 1.0633,
|
41344 |
+
"step": 5905
|
41345 |
+
},
|
41346 |
+
{
|
41347 |
+
"epoch": 0.38182978964126685,
|
41348 |
+
"grad_norm": 2.6144261360168457,
|
41349 |
+
"learning_rate": 6.9048839677773235e-06,
|
41350 |
+
"loss": 1.1399,
|
41351 |
+
"step": 5906
|
41352 |
+
},
|
41353 |
+
{
|
41354 |
+
"epoch": 0.3818944408078163,
|
41355 |
+
"grad_norm": 2.6496615409851074,
|
41356 |
+
"learning_rate": 6.9039354336376195e-06,
|
41357 |
+
"loss": 1.1642,
|
41358 |
+
"step": 5907
|
41359 |
+
},
|
41360 |
+
{
|
41361 |
+
"epoch": 0.38195909197436584,
|
41362 |
+
"grad_norm": 2.639331340789795,
|
41363 |
+
"learning_rate": 6.902986819350757e-06,
|
41364 |
+
"loss": 1.1909,
|
41365 |
+
"step": 5908
|
41366 |
+
},
|
41367 |
+
{
|
41368 |
+
"epoch": 0.3820237431409153,
|
41369 |
+
"grad_norm": 2.60591197013855,
|
41370 |
+
"learning_rate": 6.90203812495667e-06,
|
41371 |
+
"loss": 1.0447,
|
41372 |
+
"step": 5909
|
41373 |
+
},
|
41374 |
+
{
|
41375 |
+
"epoch": 0.3820883943074648,
|
41376 |
+
"grad_norm": 2.6222944259643555,
|
41377 |
+
"learning_rate": 6.901089350495296e-06,
|
41378 |
+
"loss": 1.1576,
|
41379 |
+
"step": 5910
|
41380 |
+
},
|
41381 |
+
{
|
41382 |
+
"epoch": 0.3821530454740143,
|
41383 |
+
"grad_norm": 2.601048231124878,
|
41384 |
+
"learning_rate": 6.900140496006572e-06,
|
41385 |
+
"loss": 1.2307,
|
41386 |
+
"step": 5911
|
41387 |
+
},
|
41388 |
+
{
|
41389 |
+
"epoch": 0.38221769664056376,
|
41390 |
+
"grad_norm": 2.1494765281677246,
|
41391 |
+
"learning_rate": 6.899191561530441e-06,
|
41392 |
+
"loss": 1.1478,
|
41393 |
+
"step": 5912
|
41394 |
+
},
|
41395 |
+
{
|
41396 |
+
"epoch": 0.38228234780711323,
|
41397 |
+
"grad_norm": 2.7690351009368896,
|
41398 |
+
"learning_rate": 6.8982425471068495e-06,
|
41399 |
+
"loss": 1.1497,
|
41400 |
+
"step": 5913
|
41401 |
+
},
|
41402 |
+
{
|
41403 |
+
"epoch": 0.38234699897366276,
|
41404 |
+
"grad_norm": 2.669267177581787,
|
41405 |
+
"learning_rate": 6.897293452775746e-06,
|
41406 |
+
"loss": 1.2059,
|
41407 |
+
"step": 5914
|
41408 |
+
},
|
41409 |
+
{
|
41410 |
+
"epoch": 0.3824116501402122,
|
41411 |
+
"grad_norm": 2.6427619457244873,
|
41412 |
+
"learning_rate": 6.896344278577083e-06,
|
41413 |
+
"loss": 1.1997,
|
41414 |
+
"step": 5915
|
41415 |
+
},
|
41416 |
+
{
|
41417 |
+
"epoch": 0.3824763013067617,
|
41418 |
+
"grad_norm": 2.8328170776367188,
|
41419 |
+
"learning_rate": 6.8953950245508186e-06,
|
41420 |
+
"loss": 1.174,
|
41421 |
+
"step": 5916
|
41422 |
+
},
|
41423 |
+
{
|
41424 |
+
"epoch": 0.3825409524733112,
|
41425 |
+
"grad_norm": 2.3416833877563477,
|
41426 |
+
"learning_rate": 6.894445690736911e-06,
|
41427 |
+
"loss": 1.2323,
|
41428 |
+
"step": 5917
|
41429 |
+
},
|
41430 |
+
{
|
41431 |
+
"epoch": 0.3826056036398607,
|
41432 |
+
"grad_norm": 2.6076183319091797,
|
41433 |
+
"learning_rate": 6.89349627717532e-06,
|
41434 |
+
"loss": 1.2821,
|
41435 |
+
"step": 5918
|
41436 |
+
},
|
41437 |
+
{
|
41438 |
+
"epoch": 0.38267025480641015,
|
41439 |
+
"grad_norm": 2.3602590560913086,
|
41440 |
+
"learning_rate": 6.892546783906016e-06,
|
41441 |
+
"loss": 1.1353,
|
41442 |
+
"step": 5919
|
41443 |
+
},
|
41444 |
+
{
|
41445 |
+
"epoch": 0.38273490597295967,
|
41446 |
+
"grad_norm": 2.70065975189209,
|
41447 |
+
"learning_rate": 6.891597210968965e-06,
|
41448 |
+
"loss": 1.1731,
|
41449 |
+
"step": 5920
|
41450 |
}
|
41451 |
],
|
41452 |
"logging_steps": 1,
|
|
|
41466 |
"attributes": {}
|
41467 |
}
|
41468 |
},
|
41469 |
+
"total_flos": 5.360788093260792e+18,
|
41470 |
"train_batch_size": 4,
|
41471 |
"trial_name": null,
|
41472 |
"trial_params": null
|