Training in progress, step 247200, checkpoint
Browse files- last-checkpoint/adapter_config.json +4 -4
- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/tokenizer_config.json +1 -1
- last-checkpoint/trainer_state.json +234 -3
- last-checkpoint/training_args.bin +1 -1
last-checkpoint/adapter_config.json
CHANGED
@@ -23,13 +23,13 @@
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
|
|
26 |
"down_proj",
|
27 |
-
"q_proj",
|
28 |
-
"k_proj",
|
29 |
"up_proj",
|
30 |
-
"
|
31 |
"v_proj",
|
32 |
-
"o_proj"
|
|
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
|
|
23 |
"rank_pattern": {},
|
24 |
"revision": null,
|
25 |
"target_modules": [
|
26 |
+
"gate_proj",
|
27 |
"down_proj",
|
|
|
|
|
28 |
"up_proj",
|
29 |
+
"q_proj",
|
30 |
"v_proj",
|
31 |
+
"o_proj",
|
32 |
+
"k_proj"
|
33 |
],
|
34 |
"task_type": "CAUSAL_LM",
|
35 |
"use_dora": false,
|
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1342238560
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:26cdd869c2b4ef51426ac8e4543dd00178c3d7969b70ed9815e4cdd56536de38
|
3 |
size 1342238560
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 683268498
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fed46b67619b196bd11398b24691e1dc344bc786624293d833d81496772323ba
|
3 |
size 683268498
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b46cf230fb1dcfff937d34e6fba4e48cdf99fb6180c75deb7bd7cfc11bd65f9
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6758d545d7403ec126f36e0141cd2cb64797d86a13dce36859bd0cdff2824ef9
|
3 |
size 1064
|
last-checkpoint/tokenizer_config.json
CHANGED
@@ -2064,7 +2064,7 @@
|
|
2064 |
"pad_token": "<|eot_id|>",
|
2065 |
"padding_side": "left",
|
2066 |
"stride": 0,
|
2067 |
-
"tokenizer_class": "
|
2068 |
"truncation_side": "right",
|
2069 |
"truncation_strategy": "longest_first"
|
2070 |
}
|
|
|
2064 |
"pad_token": "<|eot_id|>",
|
2065 |
"padding_side": "left",
|
2066 |
"stride": 0,
|
2067 |
+
"tokenizer_class": "PreTrainedTokenizer",
|
2068 |
"truncation_side": "right",
|
2069 |
"truncation_strategy": "longest_first"
|
2070 |
}
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -8428,6 +8428,237 @@
|
|
8428 |
"learning_rate": 1.9759580630399218e-05,
|
8429 |
"loss": 1.6874,
|
8430 |
"step": 240600
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8431 |
}
|
8432 |
],
|
8433 |
"logging_steps": 200,
|
@@ -8447,7 +8678,7 @@
|
|
8447 |
"attributes": {}
|
8448 |
}
|
8449 |
},
|
8450 |
-
"total_flos": 3.
|
8451 |
"train_batch_size": 1,
|
8452 |
"trial_name": null,
|
8453 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 0.1719232382214847,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 247200,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
8428 |
"learning_rate": 1.9759580630399218e-05,
|
8429 |
"loss": 1.6874,
|
8430 |
"step": 240600
|
8431 |
+
},
|
8432 |
+
{
|
8433 |
+
"epoch": 0.1674721511477893,
|
8434 |
+
"grad_norm": 5.881978511810303,
|
8435 |
+
"learning_rate": 1.9759183073216768e-05,
|
8436 |
+
"loss": 1.6754,
|
8437 |
+
"step": 240800
|
8438 |
+
},
|
8439 |
+
{
|
8440 |
+
"epoch": 0.16761124761884227,
|
8441 |
+
"grad_norm": 4.917115688323975,
|
8442 |
+
"learning_rate": 1.9758785193358672e-05,
|
8443 |
+
"loss": 1.6594,
|
8444 |
+
"step": 241000
|
8445 |
+
},
|
8446 |
+
{
|
8447 |
+
"epoch": 0.16775034408989525,
|
8448 |
+
"grad_norm": 7.00810432434082,
|
8449 |
+
"learning_rate": 1.9758386990843928e-05,
|
8450 |
+
"loss": 1.6253,
|
8451 |
+
"step": 241200
|
8452 |
+
},
|
8453 |
+
{
|
8454 |
+
"epoch": 0.16788944056094826,
|
8455 |
+
"grad_norm": 3.8624985218048096,
|
8456 |
+
"learning_rate": 1.9757988465691542e-05,
|
8457 |
+
"loss": 1.6543,
|
8458 |
+
"step": 241400
|
8459 |
+
},
|
8460 |
+
{
|
8461 |
+
"epoch": 0.16802853703200124,
|
8462 |
+
"grad_norm": 6.821996212005615,
|
8463 |
+
"learning_rate": 1.9757589617920542e-05,
|
8464 |
+
"loss": 1.674,
|
8465 |
+
"step": 241600
|
8466 |
+
},
|
8467 |
+
{
|
8468 |
+
"epoch": 0.16816763350305422,
|
8469 |
+
"grad_norm": 7.101013660430908,
|
8470 |
+
"learning_rate": 1.9757190447549967e-05,
|
8471 |
+
"loss": 1.6478,
|
8472 |
+
"step": 241800
|
8473 |
+
},
|
8474 |
+
{
|
8475 |
+
"epoch": 0.1683067299741072,
|
8476 |
+
"grad_norm": 3.3569910526275635,
|
8477 |
+
"learning_rate": 1.9756790954598874e-05,
|
8478 |
+
"loss": 1.6584,
|
8479 |
+
"step": 242000
|
8480 |
+
},
|
8481 |
+
{
|
8482 |
+
"epoch": 0.16844582644516018,
|
8483 |
+
"grad_norm": 3.9595654010772705,
|
8484 |
+
"learning_rate": 1.9756391139086332e-05,
|
8485 |
+
"loss": 1.6638,
|
8486 |
+
"step": 242200
|
8487 |
+
},
|
8488 |
+
{
|
8489 |
+
"epoch": 0.16858492291621316,
|
8490 |
+
"grad_norm": 7.490050315856934,
|
8491 |
+
"learning_rate": 1.9755991001031433e-05,
|
8492 |
+
"loss": 1.6073,
|
8493 |
+
"step": 242400
|
8494 |
+
},
|
8495 |
+
{
|
8496 |
+
"epoch": 0.16872401938726614,
|
8497 |
+
"grad_norm": 6.08933162689209,
|
8498 |
+
"learning_rate": 1.9755590540453275e-05,
|
8499 |
+
"loss": 1.6431,
|
8500 |
+
"step": 242600
|
8501 |
+
},
|
8502 |
+
{
|
8503 |
+
"epoch": 0.16886311585831912,
|
8504 |
+
"grad_norm": 5.067729473114014,
|
8505 |
+
"learning_rate": 1.9755189757370973e-05,
|
8506 |
+
"loss": 1.6102,
|
8507 |
+
"step": 242800
|
8508 |
+
},
|
8509 |
+
{
|
8510 |
+
"epoch": 0.1690022123293721,
|
8511 |
+
"grad_norm": 5.07871150970459,
|
8512 |
+
"learning_rate": 1.9754788651803664e-05,
|
8513 |
+
"loss": 1.6425,
|
8514 |
+
"step": 243000
|
8515 |
+
},
|
8516 |
+
{
|
8517 |
+
"epoch": 0.16914130880042508,
|
8518 |
+
"grad_norm": 4.212064743041992,
|
8519 |
+
"learning_rate": 1.97543872237705e-05,
|
8520 |
+
"loss": 1.6382,
|
8521 |
+
"step": 243200
|
8522 |
+
},
|
8523 |
+
{
|
8524 |
+
"epoch": 0.16928040527147806,
|
8525 |
+
"grad_norm": 5.310381889343262,
|
8526 |
+
"learning_rate": 1.9753985473290637e-05,
|
8527 |
+
"loss": 1.649,
|
8528 |
+
"step": 243400
|
8529 |
+
},
|
8530 |
+
{
|
8531 |
+
"epoch": 0.16941950174253104,
|
8532 |
+
"grad_norm": 5.563880443572998,
|
8533 |
+
"learning_rate": 1.9753583400383262e-05,
|
8534 |
+
"loss": 1.68,
|
8535 |
+
"step": 243600
|
8536 |
+
},
|
8537 |
+
{
|
8538 |
+
"epoch": 0.16955859821358402,
|
8539 |
+
"grad_norm": 3.346017837524414,
|
8540 |
+
"learning_rate": 1.975318100506756e-05,
|
8541 |
+
"loss": 1.6314,
|
8542 |
+
"step": 243800
|
8543 |
+
},
|
8544 |
+
{
|
8545 |
+
"epoch": 0.169697694684637,
|
8546 |
+
"grad_norm": 3.5837533473968506,
|
8547 |
+
"learning_rate": 1.9752778287362746e-05,
|
8548 |
+
"loss": 1.6668,
|
8549 |
+
"step": 244000
|
8550 |
+
},
|
8551 |
+
{
|
8552 |
+
"epoch": 0.16983679115568998,
|
8553 |
+
"grad_norm": 8.021364212036133,
|
8554 |
+
"learning_rate": 1.9752375247288046e-05,
|
8555 |
+
"loss": 1.6824,
|
8556 |
+
"step": 244200
|
8557 |
+
},
|
8558 |
+
{
|
8559 |
+
"epoch": 0.16997588762674296,
|
8560 |
+
"grad_norm": 6.544102191925049,
|
8561 |
+
"learning_rate": 1.97519718848627e-05,
|
8562 |
+
"loss": 1.6961,
|
8563 |
+
"step": 244400
|
8564 |
+
},
|
8565 |
+
{
|
8566 |
+
"epoch": 0.17011498409779593,
|
8567 |
+
"grad_norm": 6.292764663696289,
|
8568 |
+
"learning_rate": 1.9751568200105962e-05,
|
8569 |
+
"loss": 1.6397,
|
8570 |
+
"step": 244600
|
8571 |
+
},
|
8572 |
+
{
|
8573 |
+
"epoch": 0.17025408056884891,
|
8574 |
+
"grad_norm": 7.250925064086914,
|
8575 |
+
"learning_rate": 1.9751164193037104e-05,
|
8576 |
+
"loss": 1.7036,
|
8577 |
+
"step": 244800
|
8578 |
+
},
|
8579 |
+
{
|
8580 |
+
"epoch": 0.17039317703990192,
|
8581 |
+
"grad_norm": 4.996527194976807,
|
8582 |
+
"learning_rate": 1.975075986367542e-05,
|
8583 |
+
"loss": 1.609,
|
8584 |
+
"step": 245000
|
8585 |
+
},
|
8586 |
+
{
|
8587 |
+
"epoch": 0.1705322735109549,
|
8588 |
+
"grad_norm": 5.131301403045654,
|
8589 |
+
"learning_rate": 1.97503552120402e-05,
|
8590 |
+
"loss": 1.7069,
|
8591 |
+
"step": 245200
|
8592 |
+
},
|
8593 |
+
{
|
8594 |
+
"epoch": 0.17067136998200788,
|
8595 |
+
"grad_norm": 8.829029083251953,
|
8596 |
+
"learning_rate": 1.9749950238150776e-05,
|
8597 |
+
"loss": 1.6709,
|
8598 |
+
"step": 245400
|
8599 |
+
},
|
8600 |
+
{
|
8601 |
+
"epoch": 0.17081046645306086,
|
8602 |
+
"grad_norm": 5.728978633880615,
|
8603 |
+
"learning_rate": 1.9749544942026467e-05,
|
8604 |
+
"loss": 1.672,
|
8605 |
+
"step": 245600
|
8606 |
+
},
|
8607 |
+
{
|
8608 |
+
"epoch": 0.17094956292411384,
|
8609 |
+
"grad_norm": 5.395960330963135,
|
8610 |
+
"learning_rate": 1.9749139323686628e-05,
|
8611 |
+
"loss": 1.6404,
|
8612 |
+
"step": 245800
|
8613 |
+
},
|
8614 |
+
{
|
8615 |
+
"epoch": 0.17108865939516682,
|
8616 |
+
"grad_norm": 3.34220027923584,
|
8617 |
+
"learning_rate": 1.9748733383150624e-05,
|
8618 |
+
"loss": 1.6915,
|
8619 |
+
"step": 246000
|
8620 |
+
},
|
8621 |
+
{
|
8622 |
+
"epoch": 0.1712277558662198,
|
8623 |
+
"grad_norm": 2.9125590324401855,
|
8624 |
+
"learning_rate": 1.974832712043783e-05,
|
8625 |
+
"loss": 1.6057,
|
8626 |
+
"step": 246200
|
8627 |
+
},
|
8628 |
+
{
|
8629 |
+
"epoch": 0.17136685233727278,
|
8630 |
+
"grad_norm": 5.961441516876221,
|
8631 |
+
"learning_rate": 1.974792053556764e-05,
|
8632 |
+
"loss": 1.6639,
|
8633 |
+
"step": 246400
|
8634 |
+
},
|
8635 |
+
{
|
8636 |
+
"epoch": 0.17150594880832576,
|
8637 |
+
"grad_norm": 3.4587485790252686,
|
8638 |
+
"learning_rate": 1.9747513628559473e-05,
|
8639 |
+
"loss": 1.667,
|
8640 |
+
"step": 246600
|
8641 |
+
},
|
8642 |
+
{
|
8643 |
+
"epoch": 0.17164504527937874,
|
8644 |
+
"grad_norm": 3.578892946243286,
|
8645 |
+
"learning_rate": 1.974710639943274e-05,
|
8646 |
+
"loss": 1.6889,
|
8647 |
+
"step": 246800
|
8648 |
+
},
|
8649 |
+
{
|
8650 |
+
"epoch": 0.17178414175043172,
|
8651 |
+
"grad_norm": 4.567336082458496,
|
8652 |
+
"learning_rate": 1.9746698848206897e-05,
|
8653 |
+
"loss": 1.6884,
|
8654 |
+
"step": 247000
|
8655 |
+
},
|
8656 |
+
{
|
8657 |
+
"epoch": 0.1719232382214847,
|
8658 |
+
"grad_norm": 4.0480523109436035,
|
8659 |
+
"learning_rate": 1.974629097490139e-05,
|
8660 |
+
"loss": 1.6463,
|
8661 |
+
"step": 247200
|
8662 |
}
|
8663 |
],
|
8664 |
"logging_steps": 200,
|
|
|
8678 |
"attributes": {}
|
8679 |
}
|
8680 |
},
|
8681 |
+
"total_flos": 3.2903444675753165e+18,
|
8682 |
"train_batch_size": 1,
|
8683 |
"trial_name": null,
|
8684 |
"trial_params": null
|
last-checkpoint/training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d05fe2fc1f3ea3381f9d45772e347485be6cf70ca63fca95b95759ca5f4677ea
|
3 |
size 6840
|