ToastyPigeon
commited on
Training in progress, step 384, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/global_step384/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step384/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step384/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step384/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step384/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step384/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step384/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step384/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
- last-checkpoint/global_step384/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step384/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step384/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step384/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step384/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step384/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step384/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/global_step384/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
- last-checkpoint/latest +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/rng_state_4.pth +1 -1
- last-checkpoint/rng_state_5.pth +1 -1
- last-checkpoint/rng_state_6.pth +1 -1
- last-checkpoint/rng_state_7.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +235 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 550593856
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:828e147d9ff7975c4ca7c16170146341a6a759ca1f988565f9e589d2342596e7
|
3 |
size 550593856
|
last-checkpoint/global_step384/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f8f60cdf5cebb4daa2f28f924d5c9318b3777550815a0ba772278ef5535d933
|
3 |
+
size 243591168
|
last-checkpoint/global_step384/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:844f1851799e607c6ad4d775043828dc2f91b441207d791befa00fb241d1d1d1
|
3 |
+
size 243591168
|
last-checkpoint/global_step384/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a1e584f2e224ec2d8f3d405c1d312fdfc47b25c276fbcf4eaee3afa5d987046
|
3 |
+
size 243591168
|
last-checkpoint/global_step384/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9fad8287caeaa77d0ff5f44667b715f08908d365e038e733623a2ff6022f47b
|
3 |
+
size 243591168
|
last-checkpoint/global_step384/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37048fdc490efbb022bf06314e1894edff374a7f2abba775f59ab786ba8f836f
|
3 |
+
size 243591168
|
last-checkpoint/global_step384/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fe4faad056364943f3e792c90d82e5b07076b53da596716271de53bd331fd0f
|
3 |
+
size 243591168
|
last-checkpoint/global_step384/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:968d8da0a6beb74a1df7b2691968c7454779920a8c3221bb09c9816d338b4771
|
3 |
+
size 243591168
|
last-checkpoint/global_step384/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e48b6d107738efb23d8b852cd0af0a2d023774fbddf1701a9de926df2cc894de
|
3 |
+
size 243591168
|
last-checkpoint/global_step384/zero_pp_rank_0_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0914c773585179b34764f2673fc67e49623cdd6822a6d8ec833c94a3423428aa
|
3 |
+
size 211435686
|
last-checkpoint/global_step384/zero_pp_rank_1_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7b778bda5eacb772d5df0240212b9044f6f244f394acfab72de78de9975bb909
|
3 |
+
size 211435686
|
last-checkpoint/global_step384/zero_pp_rank_2_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b206a7c3fa96d0e6709760946ef32bbdff70be3c8a2a0ec9535d79279fb2c55
|
3 |
+
size 211435686
|
last-checkpoint/global_step384/zero_pp_rank_3_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48effcfd8c2b0a240bfd246675fa4a91f0a902ab85b70fe66f696e3cce72375d
|
3 |
+
size 211435686
|
last-checkpoint/global_step384/zero_pp_rank_4_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c5220a261af9a068f3fab41351433fdfc667efd51f1979926167c85889b90a0
|
3 |
+
size 211435686
|
last-checkpoint/global_step384/zero_pp_rank_5_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2181df8b256ef1dfeb6d295701ed0622d0093a8769b287ff9ee7b114523ee9a
|
3 |
+
size 211435686
|
last-checkpoint/global_step384/zero_pp_rank_6_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b9ae0e0c8c98cf4c1ffb9db17ed41419c57d6454cb5c1d5cf66b762cf98a86e
|
3 |
+
size 211435686
|
last-checkpoint/global_step384/zero_pp_rank_7_mp_rank_00_model_states.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6561b825f679c503e6a2afea2e51d929d33613f2e4c7392df0b08575068c90ca
|
3 |
+
size 211435686
|
last-checkpoint/latest
CHANGED
@@ -1 +1 @@
|
|
1 |
-
|
|
|
1 |
+
global_step384
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:58ef1ab595d05fdcb3a0f87e7a491ca7682d5949ef8b0cbdedbf09bf8ed365a4
|
3 |
size 15920
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:caddd1ed4e1057c456fac63072b0c612a6f306480858eb596ab6dd3fe30a1182
|
3 |
size 15920
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b20c72304878735b6ef72d389e6b3baf3f585d11a83f82a05bbe943f2cb45ed
|
3 |
size 15920
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c9da521bf7f407fc83ada516499e58838cbeaa21ed14e1017b59a2027e1b6412
|
3 |
size 15920
|
last-checkpoint/rng_state_4.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d100c791f147455c9629cfb40b4b4f249844a25d6f2eb446a9e4f8b3fcd45fc8
|
3 |
size 15920
|
last-checkpoint/rng_state_5.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98bb3946a7aa42bfd85cf5de7aa4d59dcae0fbaaee932e73a3cd6d0b02a487b4
|
3 |
size 15920
|
last-checkpoint/rng_state_6.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09dbc1442190c2bd85ae2e07a2140c70024d933bd92a92b55672e8cba2fc8ad4
|
3 |
size 15920
|
last-checkpoint/rng_state_7.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15920
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9713ea8276ee3c304acb4a6c1c3f39b86b13faba6b17609fddc996c3bf4350a0
|
3 |
size 15920
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb2118b619b8caea9c3657ac3db8db82c41ee9a909354d950dc1bb914eb2c8f3
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 0
|
5 |
"eval_steps": 39,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2544,6 +2544,237 @@
|
|
2544 |
"eval_samples_per_second": 1.228,
|
2545 |
"eval_steps_per_second": 0.154,
|
2546 |
"step": 351
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2547 |
}
|
2548 |
],
|
2549 |
"logging_steps": 1,
|
@@ -2558,12 +2789,12 @@
|
|
2558 |
"should_evaluate": false,
|
2559 |
"should_log": false,
|
2560 |
"should_save": true,
|
2561 |
-
"should_training_stop":
|
2562 |
},
|
2563 |
"attributes": {}
|
2564 |
}
|
2565 |
},
|
2566 |
-
"total_flos":
|
2567 |
"train_batch_size": 1,
|
2568 |
"trial_name": null,
|
2569 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.0,
|
5 |
"eval_steps": 39,
|
6 |
+
"global_step": 384,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2544 |
"eval_samples_per_second": 1.228,
|
2545 |
"eval_steps_per_second": 0.154,
|
2546 |
"step": 351
|
2547 |
+
},
|
2548 |
+
{
|
2549 |
+
"epoch": 0.9166666666666666,
|
2550 |
+
"grad_norm": 0.17312639183632889,
|
2551 |
+
"learning_rate": 1.1705364107037981e-05,
|
2552 |
+
"loss": 2.4481,
|
2553 |
+
"step": 352
|
2554 |
+
},
|
2555 |
+
{
|
2556 |
+
"epoch": 0.9192708333333334,
|
2557 |
+
"grad_norm": 0.17233940633090827,
|
2558 |
+
"learning_rate": 1.1601071028553371e-05,
|
2559 |
+
"loss": 2.4747,
|
2560 |
+
"step": 353
|
2561 |
+
},
|
2562 |
+
{
|
2563 |
+
"epoch": 0.921875,
|
2564 |
+
"grad_norm": 0.13057391845213812,
|
2565 |
+
"learning_rate": 1.1500010708805123e-05,
|
2566 |
+
"loss": 2.3276,
|
2567 |
+
"step": 354
|
2568 |
+
},
|
2569 |
+
{
|
2570 |
+
"epoch": 0.9244791666666666,
|
2571 |
+
"grad_norm": 0.15218005054913405,
|
2572 |
+
"learning_rate": 1.1402190675712448e-05,
|
2573 |
+
"loss": 2.4584,
|
2574 |
+
"step": 355
|
2575 |
+
},
|
2576 |
+
{
|
2577 |
+
"epoch": 0.9270833333333334,
|
2578 |
+
"grad_norm": 0.1405908251620449,
|
2579 |
+
"learning_rate": 1.130761821582766e-05,
|
2580 |
+
"loss": 2.3576,
|
2581 |
+
"step": 356
|
2582 |
+
},
|
2583 |
+
{
|
2584 |
+
"epoch": 0.9296875,
|
2585 |
+
"grad_norm": 0.15487043477366738,
|
2586 |
+
"learning_rate": 1.1216300373793417e-05,
|
2587 |
+
"loss": 2.3773,
|
2588 |
+
"step": 357
|
2589 |
+
},
|
2590 |
+
{
|
2591 |
+
"epoch": 0.9322916666666666,
|
2592 |
+
"grad_norm": 0.15092606814376955,
|
2593 |
+
"learning_rate": 1.1128243951817937e-05,
|
2594 |
+
"loss": 2.2986,
|
2595 |
+
"step": 358
|
2596 |
+
},
|
2597 |
+
{
|
2598 |
+
"epoch": 0.9348958333333334,
|
2599 |
+
"grad_norm": 0.1484173556255145,
|
2600 |
+
"learning_rate": 1.1043455509168339e-05,
|
2601 |
+
"loss": 2.2237,
|
2602 |
+
"step": 359
|
2603 |
+
},
|
2604 |
+
{
|
2605 |
+
"epoch": 0.9375,
|
2606 |
+
"grad_norm": 0.14235753907847776,
|
2607 |
+
"learning_rate": 1.0961941361682013e-05,
|
2608 |
+
"loss": 2.4375,
|
2609 |
+
"step": 360
|
2610 |
+
},
|
2611 |
+
{
|
2612 |
+
"epoch": 0.9401041666666666,
|
2613 |
+
"grad_norm": 0.14142558045399545,
|
2614 |
+
"learning_rate": 1.0883707581296196e-05,
|
2615 |
+
"loss": 2.5165,
|
2616 |
+
"step": 361
|
2617 |
+
},
|
2618 |
+
{
|
2619 |
+
"epoch": 0.9427083333333334,
|
2620 |
+
"grad_norm": 0.13890140092039985,
|
2621 |
+
"learning_rate": 1.080875999559564e-05,
|
2622 |
+
"loss": 2.477,
|
2623 |
+
"step": 362
|
2624 |
+
},
|
2625 |
+
{
|
2626 |
+
"epoch": 0.9453125,
|
2627 |
+
"grad_norm": 0.149033997441562,
|
2628 |
+
"learning_rate": 1.0737104187378542e-05,
|
2629 |
+
"loss": 2.386,
|
2630 |
+
"step": 363
|
2631 |
+
},
|
2632 |
+
{
|
2633 |
+
"epoch": 0.9479166666666666,
|
2634 |
+
"grad_norm": 0.15829792856388608,
|
2635 |
+
"learning_rate": 1.066874549424068e-05,
|
2636 |
+
"loss": 2.2997,
|
2637 |
+
"step": 364
|
2638 |
+
},
|
2639 |
+
{
|
2640 |
+
"epoch": 0.9505208333333334,
|
2641 |
+
"grad_norm": 0.14793151985278888,
|
2642 |
+
"learning_rate": 1.0603689008177822e-05,
|
2643 |
+
"loss": 2.4599,
|
2644 |
+
"step": 365
|
2645 |
+
},
|
2646 |
+
{
|
2647 |
+
"epoch": 0.953125,
|
2648 |
+
"grad_norm": 0.1556260763161562,
|
2649 |
+
"learning_rate": 1.0541939575206412e-05,
|
2650 |
+
"loss": 2.2611,
|
2651 |
+
"step": 366
|
2652 |
+
},
|
2653 |
+
{
|
2654 |
+
"epoch": 0.9557291666666666,
|
2655 |
+
"grad_norm": 0.16684067808748887,
|
2656 |
+
"learning_rate": 1.0483501795002612e-05,
|
2657 |
+
"loss": 2.4216,
|
2658 |
+
"step": 367
|
2659 |
+
},
|
2660 |
+
{
|
2661 |
+
"epoch": 0.9583333333333334,
|
2662 |
+
"grad_norm": 0.14213262998853762,
|
2663 |
+
"learning_rate": 1.0428380020559658e-05,
|
2664 |
+
"loss": 2.4624,
|
2665 |
+
"step": 368
|
2666 |
+
},
|
2667 |
+
{
|
2668 |
+
"epoch": 0.9609375,
|
2669 |
+
"grad_norm": 0.16542015576320396,
|
2670 |
+
"learning_rate": 1.0376578357863627e-05,
|
2671 |
+
"loss": 2.087,
|
2672 |
+
"step": 369
|
2673 |
+
},
|
2674 |
+
{
|
2675 |
+
"epoch": 0.9635416666666666,
|
2676 |
+
"grad_norm": 0.16312011726686781,
|
2677 |
+
"learning_rate": 1.0328100665587574e-05,
|
2678 |
+
"loss": 2.3865,
|
2679 |
+
"step": 370
|
2680 |
+
},
|
2681 |
+
{
|
2682 |
+
"epoch": 0.9661458333333334,
|
2683 |
+
"grad_norm": 0.14758591702174165,
|
2684 |
+
"learning_rate": 1.0282950554804085e-05,
|
2685 |
+
"loss": 2.3726,
|
2686 |
+
"step": 371
|
2687 |
+
},
|
2688 |
+
{
|
2689 |
+
"epoch": 0.96875,
|
2690 |
+
"grad_norm": 0.1316294327416778,
|
2691 |
+
"learning_rate": 1.0241131388716332e-05,
|
2692 |
+
"loss": 2.4155,
|
2693 |
+
"step": 372
|
2694 |
+
},
|
2695 |
+
{
|
2696 |
+
"epoch": 0.9713541666666666,
|
2697 |
+
"grad_norm": 0.14988844894688882,
|
2698 |
+
"learning_rate": 1.0202646282407505e-05,
|
2699 |
+
"loss": 2.3134,
|
2700 |
+
"step": 373
|
2701 |
+
},
|
2702 |
+
{
|
2703 |
+
"epoch": 0.9739583333333334,
|
2704 |
+
"grad_norm": 0.16468705638317635,
|
2705 |
+
"learning_rate": 1.016749810260881e-05,
|
2706 |
+
"loss": 2.3345,
|
2707 |
+
"step": 374
|
2708 |
+
},
|
2709 |
+
{
|
2710 |
+
"epoch": 0.9765625,
|
2711 |
+
"grad_norm": 0.16639080943133064,
|
2712 |
+
"learning_rate": 1.01356894674859e-05,
|
2713 |
+
"loss": 2.1808,
|
2714 |
+
"step": 375
|
2715 |
+
},
|
2716 |
+
{
|
2717 |
+
"epoch": 0.9791666666666666,
|
2718 |
+
"grad_norm": 0.1537739755991523,
|
2719 |
+
"learning_rate": 1.0107222746443862e-05,
|
2720 |
+
"loss": 2.3806,
|
2721 |
+
"step": 376
|
2722 |
+
},
|
2723 |
+
{
|
2724 |
+
"epoch": 0.9817708333333334,
|
2725 |
+
"grad_norm": 0.1582641580303379,
|
2726 |
+
"learning_rate": 1.0082100059950713e-05,
|
2727 |
+
"loss": 2.4064,
|
2728 |
+
"step": 377
|
2729 |
+
},
|
2730 |
+
{
|
2731 |
+
"epoch": 0.984375,
|
2732 |
+
"grad_norm": 0.16490051463401306,
|
2733 |
+
"learning_rate": 1.0060323279379476e-05,
|
2734 |
+
"loss": 2.3932,
|
2735 |
+
"step": 378
|
2736 |
+
},
|
2737 |
+
{
|
2738 |
+
"epoch": 0.9869791666666666,
|
2739 |
+
"grad_norm": 0.14677990127698765,
|
2740 |
+
"learning_rate": 1.0041894026868732e-05,
|
2741 |
+
"loss": 2.3932,
|
2742 |
+
"step": 379
|
2743 |
+
},
|
2744 |
+
{
|
2745 |
+
"epoch": 0.9895833333333334,
|
2746 |
+
"grad_norm": 0.17686734833911588,
|
2747 |
+
"learning_rate": 1.0026813675201832e-05,
|
2748 |
+
"loss": 2.2527,
|
2749 |
+
"step": 380
|
2750 |
+
},
|
2751 |
+
{
|
2752 |
+
"epoch": 0.9921875,
|
2753 |
+
"grad_norm": 0.15542797613175577,
|
2754 |
+
"learning_rate": 1.0015083347704623e-05,
|
2755 |
+
"loss": 2.4111,
|
2756 |
+
"step": 381
|
2757 |
+
},
|
2758 |
+
{
|
2759 |
+
"epoch": 0.9947916666666666,
|
2760 |
+
"grad_norm": 0.1559166935312072,
|
2761 |
+
"learning_rate": 1.0006703918161775e-05,
|
2762 |
+
"loss": 2.4522,
|
2763 |
+
"step": 382
|
2764 |
+
},
|
2765 |
+
{
|
2766 |
+
"epoch": 0.9973958333333334,
|
2767 |
+
"grad_norm": 0.14284208961058514,
|
2768 |
+
"learning_rate": 1.000167601075169e-05,
|
2769 |
+
"loss": 2.3575,
|
2770 |
+
"step": 383
|
2771 |
+
},
|
2772 |
+
{
|
2773 |
+
"epoch": 1.0,
|
2774 |
+
"grad_norm": 0.14031951339304571,
|
2775 |
+
"learning_rate": 1e-05,
|
2776 |
+
"loss": 2.3152,
|
2777 |
+
"step": 384
|
2778 |
}
|
2779 |
],
|
2780 |
"logging_steps": 1,
|
|
|
2789 |
"should_evaluate": false,
|
2790 |
"should_log": false,
|
2791 |
"should_save": true,
|
2792 |
+
"should_training_stop": true
|
2793 |
},
|
2794 |
"attributes": {}
|
2795 |
}
|
2796 |
},
|
2797 |
+
"total_flos": 126942053400576.0,
|
2798 |
"train_batch_size": 1,
|
2799 |
"trial_name": null,
|
2800 |
"trial_params": null
|