ToastyPigeon commited on
Commit
ffe35f4
·
verified ·
1 Parent(s): 2dd7389

Training in progress, step 384, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step384/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step384/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step384/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step384/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step384/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step384/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step384/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step384/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step384/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step384/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step384/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step384/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step384/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step384/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step384/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step384/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +235 -4
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:049ec0ada0813997e586979480e5c26282234a3e55448657344416c90be4b443
3
  size 550593856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:828e147d9ff7975c4ca7c16170146341a6a759ca1f988565f9e589d2342596e7
3
  size 550593856
last-checkpoint/global_step384/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f8f60cdf5cebb4daa2f28f924d5c9318b3777550815a0ba772278ef5535d933
3
+ size 243591168
last-checkpoint/global_step384/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:844f1851799e607c6ad4d775043828dc2f91b441207d791befa00fb241d1d1d1
3
+ size 243591168
last-checkpoint/global_step384/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a1e584f2e224ec2d8f3d405c1d312fdfc47b25c276fbcf4eaee3afa5d987046
3
+ size 243591168
last-checkpoint/global_step384/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9fad8287caeaa77d0ff5f44667b715f08908d365e038e733623a2ff6022f47b
3
+ size 243591168
last-checkpoint/global_step384/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37048fdc490efbb022bf06314e1894edff374a7f2abba775f59ab786ba8f836f
3
+ size 243591168
last-checkpoint/global_step384/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fe4faad056364943f3e792c90d82e5b07076b53da596716271de53bd331fd0f
3
+ size 243591168
last-checkpoint/global_step384/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:968d8da0a6beb74a1df7b2691968c7454779920a8c3221bb09c9816d338b4771
3
+ size 243591168
last-checkpoint/global_step384/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e48b6d107738efb23d8b852cd0af0a2d023774fbddf1701a9de926df2cc894de
3
+ size 243591168
last-checkpoint/global_step384/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0914c773585179b34764f2673fc67e49623cdd6822a6d8ec833c94a3423428aa
3
+ size 211435686
last-checkpoint/global_step384/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b778bda5eacb772d5df0240212b9044f6f244f394acfab72de78de9975bb909
3
+ size 211435686
last-checkpoint/global_step384/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b206a7c3fa96d0e6709760946ef32bbdff70be3c8a2a0ec9535d79279fb2c55
3
+ size 211435686
last-checkpoint/global_step384/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48effcfd8c2b0a240bfd246675fa4a91f0a902ab85b70fe66f696e3cce72375d
3
+ size 211435686
last-checkpoint/global_step384/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5220a261af9a068f3fab41351433fdfc667efd51f1979926167c85889b90a0
3
+ size 211435686
last-checkpoint/global_step384/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2181df8b256ef1dfeb6d295701ed0622d0093a8769b287ff9ee7b114523ee9a
3
+ size 211435686
last-checkpoint/global_step384/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b9ae0e0c8c98cf4c1ffb9db17ed41419c57d6454cb5c1d5cf66b762cf98a86e
3
+ size 211435686
last-checkpoint/global_step384/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6561b825f679c503e6a2afea2e51d929d33613f2e4c7392df0b08575068c90ca
3
+ size 211435686
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step351
 
1
+ global_step384
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0fec0d859a8870bb3863562a0fbaa6ebb33536c9a365b6abc0e8f09aacd3377c
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58ef1ab595d05fdcb3a0f87e7a491ca7682d5949ef8b0cbdedbf09bf8ed365a4
3
  size 15920
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:212dc5192b40486002c7fe7e08f770847069213c90b44b8eeb5c8c552aa09d2d
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caddd1ed4e1057c456fac63072b0c612a6f306480858eb596ab6dd3fe30a1182
3
  size 15920
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef0f0879c31278fe1422ca799f1e946f52627895e6b6b450451e9b838670c583
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b20c72304878735b6ef72d389e6b3baf3f585d11a83f82a05bbe943f2cb45ed
3
  size 15920
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66c283d16fff6ad5c32bb8063ebb0876ab2d3331a701287a5518f6611da12f69
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9da521bf7f407fc83ada516499e58838cbeaa21ed14e1017b59a2027e1b6412
3
  size 15920
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d45b771d912fd42fd57bf4de4aad3e9035242ea784f2ed87aba4c621e5cd51a
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d100c791f147455c9629cfb40b4b4f249844a25d6f2eb446a9e4f8b3fcd45fc8
3
  size 15920
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5016a6096b206da65160f901789323144aa46e697f4c7af7e972b16bb657ce2c
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98bb3946a7aa42bfd85cf5de7aa4d59dcae0fbaaee932e73a3cd6d0b02a487b4
3
  size 15920
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b70b7d3a0a245937c94c1dfab4a674840cffae712e68cd2ec85b8111cd19b6ae
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09dbc1442190c2bd85ae2e07a2140c70024d933bd92a92b55672e8cba2fc8ad4
3
  size 15920
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:894e3dc130b31d1ff09091d3fba8c4756c7829f76a26489876f6acf7ddf58730
3
  size 15920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9713ea8276ee3c304acb4a6c1c3f39b86b13faba6b17609fddc996c3bf4350a0
3
  size 15920
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77a901e68fe508c580838a0a83e25dcc491921acb23119eaf14194c3e5bc3346
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb2118b619b8caea9c3657ac3db8db82c41ee9a909354d950dc1bb914eb2c8f3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9140625,
5
  "eval_steps": 39,
6
- "global_step": 351,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2544,6 +2544,237 @@
2544
  "eval_samples_per_second": 1.228,
2545
  "eval_steps_per_second": 0.154,
2546
  "step": 351
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2547
  }
2548
  ],
2549
  "logging_steps": 1,
@@ -2558,12 +2789,12 @@
2558
  "should_evaluate": false,
2559
  "should_log": false,
2560
  "should_save": true,
2561
- "should_training_stop": false
2562
  },
2563
  "attributes": {}
2564
  }
2565
  },
2566
- "total_flos": 116032970686464.0,
2567
  "train_batch_size": 1,
2568
  "trial_name": null,
2569
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 39,
6
+ "global_step": 384,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2544
  "eval_samples_per_second": 1.228,
2545
  "eval_steps_per_second": 0.154,
2546
  "step": 351
2547
+ },
2548
+ {
2549
+ "epoch": 0.9166666666666666,
2550
+ "grad_norm": 0.17312639183632889,
2551
+ "learning_rate": 1.1705364107037981e-05,
2552
+ "loss": 2.4481,
2553
+ "step": 352
2554
+ },
2555
+ {
2556
+ "epoch": 0.9192708333333334,
2557
+ "grad_norm": 0.17233940633090827,
2558
+ "learning_rate": 1.1601071028553371e-05,
2559
+ "loss": 2.4747,
2560
+ "step": 353
2561
+ },
2562
+ {
2563
+ "epoch": 0.921875,
2564
+ "grad_norm": 0.13057391845213812,
2565
+ "learning_rate": 1.1500010708805123e-05,
2566
+ "loss": 2.3276,
2567
+ "step": 354
2568
+ },
2569
+ {
2570
+ "epoch": 0.9244791666666666,
2571
+ "grad_norm": 0.15218005054913405,
2572
+ "learning_rate": 1.1402190675712448e-05,
2573
+ "loss": 2.4584,
2574
+ "step": 355
2575
+ },
2576
+ {
2577
+ "epoch": 0.9270833333333334,
2578
+ "grad_norm": 0.1405908251620449,
2579
+ "learning_rate": 1.130761821582766e-05,
2580
+ "loss": 2.3576,
2581
+ "step": 356
2582
+ },
2583
+ {
2584
+ "epoch": 0.9296875,
2585
+ "grad_norm": 0.15487043477366738,
2586
+ "learning_rate": 1.1216300373793417e-05,
2587
+ "loss": 2.3773,
2588
+ "step": 357
2589
+ },
2590
+ {
2591
+ "epoch": 0.9322916666666666,
2592
+ "grad_norm": 0.15092606814376955,
2593
+ "learning_rate": 1.1128243951817937e-05,
2594
+ "loss": 2.2986,
2595
+ "step": 358
2596
+ },
2597
+ {
2598
+ "epoch": 0.9348958333333334,
2599
+ "grad_norm": 0.1484173556255145,
2600
+ "learning_rate": 1.1043455509168339e-05,
2601
+ "loss": 2.2237,
2602
+ "step": 359
2603
+ },
2604
+ {
2605
+ "epoch": 0.9375,
2606
+ "grad_norm": 0.14235753907847776,
2607
+ "learning_rate": 1.0961941361682013e-05,
2608
+ "loss": 2.4375,
2609
+ "step": 360
2610
+ },
2611
+ {
2612
+ "epoch": 0.9401041666666666,
2613
+ "grad_norm": 0.14142558045399545,
2614
+ "learning_rate": 1.0883707581296196e-05,
2615
+ "loss": 2.5165,
2616
+ "step": 361
2617
+ },
2618
+ {
2619
+ "epoch": 0.9427083333333334,
2620
+ "grad_norm": 0.13890140092039985,
2621
+ "learning_rate": 1.080875999559564e-05,
2622
+ "loss": 2.477,
2623
+ "step": 362
2624
+ },
2625
+ {
2626
+ "epoch": 0.9453125,
2627
+ "grad_norm": 0.149033997441562,
2628
+ "learning_rate": 1.0737104187378542e-05,
2629
+ "loss": 2.386,
2630
+ "step": 363
2631
+ },
2632
+ {
2633
+ "epoch": 0.9479166666666666,
2634
+ "grad_norm": 0.15829792856388608,
2635
+ "learning_rate": 1.066874549424068e-05,
2636
+ "loss": 2.2997,
2637
+ "step": 364
2638
+ },
2639
+ {
2640
+ "epoch": 0.9505208333333334,
2641
+ "grad_norm": 0.14793151985278888,
2642
+ "learning_rate": 1.0603689008177822e-05,
2643
+ "loss": 2.4599,
2644
+ "step": 365
2645
+ },
2646
+ {
2647
+ "epoch": 0.953125,
2648
+ "grad_norm": 0.1556260763161562,
2649
+ "learning_rate": 1.0541939575206412e-05,
2650
+ "loss": 2.2611,
2651
+ "step": 366
2652
+ },
2653
+ {
2654
+ "epoch": 0.9557291666666666,
2655
+ "grad_norm": 0.16684067808748887,
2656
+ "learning_rate": 1.0483501795002612e-05,
2657
+ "loss": 2.4216,
2658
+ "step": 367
2659
+ },
2660
+ {
2661
+ "epoch": 0.9583333333333334,
2662
+ "grad_norm": 0.14213262998853762,
2663
+ "learning_rate": 1.0428380020559658e-05,
2664
+ "loss": 2.4624,
2665
+ "step": 368
2666
+ },
2667
+ {
2668
+ "epoch": 0.9609375,
2669
+ "grad_norm": 0.16542015576320396,
2670
+ "learning_rate": 1.0376578357863627e-05,
2671
+ "loss": 2.087,
2672
+ "step": 369
2673
+ },
2674
+ {
2675
+ "epoch": 0.9635416666666666,
2676
+ "grad_norm": 0.16312011726686781,
2677
+ "learning_rate": 1.0328100665587574e-05,
2678
+ "loss": 2.3865,
2679
+ "step": 370
2680
+ },
2681
+ {
2682
+ "epoch": 0.9661458333333334,
2683
+ "grad_norm": 0.14758591702174165,
2684
+ "learning_rate": 1.0282950554804085e-05,
2685
+ "loss": 2.3726,
2686
+ "step": 371
2687
+ },
2688
+ {
2689
+ "epoch": 0.96875,
2690
+ "grad_norm": 0.1316294327416778,
2691
+ "learning_rate": 1.0241131388716332e-05,
2692
+ "loss": 2.4155,
2693
+ "step": 372
2694
+ },
2695
+ {
2696
+ "epoch": 0.9713541666666666,
2697
+ "grad_norm": 0.14988844894688882,
2698
+ "learning_rate": 1.0202646282407505e-05,
2699
+ "loss": 2.3134,
2700
+ "step": 373
2701
+ },
2702
+ {
2703
+ "epoch": 0.9739583333333334,
2704
+ "grad_norm": 0.16468705638317635,
2705
+ "learning_rate": 1.016749810260881e-05,
2706
+ "loss": 2.3345,
2707
+ "step": 374
2708
+ },
2709
+ {
2710
+ "epoch": 0.9765625,
2711
+ "grad_norm": 0.16639080943133064,
2712
+ "learning_rate": 1.01356894674859e-05,
2713
+ "loss": 2.1808,
2714
+ "step": 375
2715
+ },
2716
+ {
2717
+ "epoch": 0.9791666666666666,
2718
+ "grad_norm": 0.1537739755991523,
2719
+ "learning_rate": 1.0107222746443862e-05,
2720
+ "loss": 2.3806,
2721
+ "step": 376
2722
+ },
2723
+ {
2724
+ "epoch": 0.9817708333333334,
2725
+ "grad_norm": 0.1582641580303379,
2726
+ "learning_rate": 1.0082100059950713e-05,
2727
+ "loss": 2.4064,
2728
+ "step": 377
2729
+ },
2730
+ {
2731
+ "epoch": 0.984375,
2732
+ "grad_norm": 0.16490051463401306,
2733
+ "learning_rate": 1.0060323279379476e-05,
2734
+ "loss": 2.3932,
2735
+ "step": 378
2736
+ },
2737
+ {
2738
+ "epoch": 0.9869791666666666,
2739
+ "grad_norm": 0.14677990127698765,
2740
+ "learning_rate": 1.0041894026868732e-05,
2741
+ "loss": 2.3932,
2742
+ "step": 379
2743
+ },
2744
+ {
2745
+ "epoch": 0.9895833333333334,
2746
+ "grad_norm": 0.17686734833911588,
2747
+ "learning_rate": 1.0026813675201832e-05,
2748
+ "loss": 2.2527,
2749
+ "step": 380
2750
+ },
2751
+ {
2752
+ "epoch": 0.9921875,
2753
+ "grad_norm": 0.15542797613175577,
2754
+ "learning_rate": 1.0015083347704623e-05,
2755
+ "loss": 2.4111,
2756
+ "step": 381
2757
+ },
2758
+ {
2759
+ "epoch": 0.9947916666666666,
2760
+ "grad_norm": 0.1559166935312072,
2761
+ "learning_rate": 1.0006703918161775e-05,
2762
+ "loss": 2.4522,
2763
+ "step": 382
2764
+ },
2765
+ {
2766
+ "epoch": 0.9973958333333334,
2767
+ "grad_norm": 0.14284208961058514,
2768
+ "learning_rate": 1.000167601075169e-05,
2769
+ "loss": 2.3575,
2770
+ "step": 383
2771
+ },
2772
+ {
2773
+ "epoch": 1.0,
2774
+ "grad_norm": 0.14031951339304571,
2775
+ "learning_rate": 1e-05,
2776
+ "loss": 2.3152,
2777
+ "step": 384
2778
  }
2779
  ],
2780
  "logging_steps": 1,
 
2789
  "should_evaluate": false,
2790
  "should_log": false,
2791
  "should_save": true,
2792
+ "should_training_stop": true
2793
  },
2794
  "attributes": {}
2795
  }
2796
  },
2797
+ "total_flos": 126942053400576.0,
2798
  "train_batch_size": 1,
2799
  "trial_name": null,
2800
  "trial_params": null