sft-mistral-v1-clean-valid / trainer_state.json
hllj's picture
Model save
d3eab40
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 1000,
"global_step": 1900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 3.1578947368421055e-07,
"loss": 0.7568,
"step": 1
},
{
"epoch": 0.01,
"learning_rate": 3.157894736842105e-06,
"loss": 0.7081,
"step": 10
},
{
"epoch": 0.02,
"learning_rate": 6.31578947368421e-06,
"loss": 0.6806,
"step": 20
},
{
"epoch": 0.03,
"learning_rate": 9.473684210526315e-06,
"loss": 0.6123,
"step": 30
},
{
"epoch": 0.04,
"learning_rate": 1.263157894736842e-05,
"loss": 0.4982,
"step": 40
},
{
"epoch": 0.05,
"learning_rate": 1.5789473684210526e-05,
"loss": 0.4448,
"step": 50
},
{
"epoch": 0.06,
"learning_rate": 1.894736842105263e-05,
"loss": 0.4243,
"step": 60
},
{
"epoch": 0.07,
"learning_rate": 2.2105263157894736e-05,
"loss": 0.3961,
"step": 70
},
{
"epoch": 0.08,
"learning_rate": 2.526315789473684e-05,
"loss": 0.3963,
"step": 80
},
{
"epoch": 0.09,
"learning_rate": 2.8421052631578946e-05,
"loss": 0.3795,
"step": 90
},
{
"epoch": 0.11,
"learning_rate": 2.9999432005848255e-05,
"loss": 0.3786,
"step": 100
},
{
"epoch": 0.12,
"learning_rate": 2.99948883107249e-05,
"loss": 0.3962,
"step": 110
},
{
"epoch": 0.13,
"learning_rate": 2.9985802296874666e-05,
"loss": 0.3566,
"step": 120
},
{
"epoch": 0.14,
"learning_rate": 2.9972176716673562e-05,
"loss": 0.3817,
"step": 130
},
{
"epoch": 0.15,
"learning_rate": 2.9954015697643372e-05,
"loss": 0.3644,
"step": 140
},
{
"epoch": 0.16,
"learning_rate": 2.9931324741201325e-05,
"loss": 0.3497,
"step": 150
},
{
"epoch": 0.17,
"learning_rate": 2.9904110720993565e-05,
"loss": 0.3601,
"step": 160
},
{
"epoch": 0.18,
"learning_rate": 2.987238188081299e-05,
"loss": 0.3877,
"step": 170
},
{
"epoch": 0.19,
"learning_rate": 2.983614783210197e-05,
"loss": 0.3585,
"step": 180
},
{
"epoch": 0.2,
"learning_rate": 2.9795419551040836e-05,
"loss": 0.3498,
"step": 190
},
{
"epoch": 0.21,
"learning_rate": 2.9750209375222893e-05,
"loss": 0.3618,
"step": 200
},
{
"epoch": 0.22,
"learning_rate": 2.97005309999171e-05,
"loss": 0.3496,
"step": 210
},
{
"epoch": 0.23,
"learning_rate": 2.964639947391939e-05,
"loss": 0.3483,
"step": 220
},
{
"epoch": 0.24,
"learning_rate": 2.958783119499408e-05,
"loss": 0.3488,
"step": 230
},
{
"epoch": 0.25,
"learning_rate": 2.9524843904906528e-05,
"loss": 0.3405,
"step": 240
},
{
"epoch": 0.26,
"learning_rate": 2.9457456684048772e-05,
"loss": 0.3441,
"step": 250
},
{
"epoch": 0.27,
"learning_rate": 2.938568994565956e-05,
"loss": 0.3638,
"step": 260
},
{
"epoch": 0.28,
"learning_rate": 2.9309565429640724e-05,
"loss": 0.3695,
"step": 270
},
{
"epoch": 0.29,
"learning_rate": 2.9229106195971603e-05,
"loss": 0.3372,
"step": 280
},
{
"epoch": 0.31,
"learning_rate": 2.9144336617723625e-05,
"loss": 0.354,
"step": 290
},
{
"epoch": 0.32,
"learning_rate": 2.90552823736771e-05,
"loss": 0.358,
"step": 300
},
{
"epoch": 0.33,
"learning_rate": 2.8961970440542496e-05,
"loss": 0.3473,
"step": 310
},
{
"epoch": 0.34,
"learning_rate": 2.8864429084788534e-05,
"loss": 0.3379,
"step": 320
},
{
"epoch": 0.35,
"learning_rate": 2.8762687854079563e-05,
"loss": 0.3794,
"step": 330
},
{
"epoch": 0.36,
"learning_rate": 2.8656777568324878e-05,
"loss": 0.3475,
"step": 340
},
{
"epoch": 0.37,
"learning_rate": 2.8546730310342593e-05,
"loss": 0.3655,
"step": 350
},
{
"epoch": 0.38,
"learning_rate": 2.8432579416140984e-05,
"loss": 0.3354,
"step": 360
},
{
"epoch": 0.39,
"learning_rate": 2.8314359464820184e-05,
"loss": 0.3448,
"step": 370
},
{
"epoch": 0.4,
"learning_rate": 2.8192106268097336e-05,
"loss": 0.3565,
"step": 380
},
{
"epoch": 0.41,
"learning_rate": 2.8065856859458346e-05,
"loss": 0.3436,
"step": 390
},
{
"epoch": 0.42,
"learning_rate": 2.7935649482939533e-05,
"loss": 0.3509,
"step": 400
},
{
"epoch": 0.43,
"learning_rate": 2.7801523581542563e-05,
"loss": 0.3312,
"step": 410
},
{
"epoch": 0.44,
"learning_rate": 2.766351978528622e-05,
"loss": 0.3445,
"step": 420
},
{
"epoch": 0.45,
"learning_rate": 2.7521679898898567e-05,
"loss": 0.3374,
"step": 430
},
{
"epoch": 0.46,
"learning_rate": 2.737604688915327e-05,
"loss": 0.3438,
"step": 440
},
{
"epoch": 0.47,
"learning_rate": 2.72266648718539e-05,
"loss": 0.3307,
"step": 450
},
{
"epoch": 0.48,
"learning_rate": 2.7073579098470196e-05,
"loss": 0.3344,
"step": 460
},
{
"epoch": 0.49,
"learning_rate": 2.6916835942430292e-05,
"loss": 0.325,
"step": 470
},
{
"epoch": 0.51,
"learning_rate": 2.6756482885073032e-05,
"loss": 0.3296,
"step": 480
},
{
"epoch": 0.52,
"learning_rate": 2.6592568501264746e-05,
"loss": 0.3536,
"step": 490
},
{
"epoch": 0.53,
"learning_rate": 2.6425142444684735e-05,
"loss": 0.3272,
"step": 500
},
{
"epoch": 0.54,
"learning_rate": 2.6254255432783933e-05,
"loss": 0.3547,
"step": 510
},
{
"epoch": 0.55,
"learning_rate": 2.6079959231421347e-05,
"loss": 0.3313,
"step": 520
},
{
"epoch": 0.56,
"learning_rate": 2.5902306639182952e-05,
"loss": 0.343,
"step": 530
},
{
"epoch": 0.57,
"learning_rate": 2.5721351471387666e-05,
"loss": 0.3129,
"step": 540
},
{
"epoch": 0.58,
"learning_rate": 2.5537148543785385e-05,
"loss": 0.3505,
"step": 550
},
{
"epoch": 0.59,
"learning_rate": 2.534975365595196e-05,
"loss": 0.3523,
"step": 560
},
{
"epoch": 0.6,
"learning_rate": 2.5159223574386117e-05,
"loss": 0.3699,
"step": 570
},
{
"epoch": 0.61,
"learning_rate": 2.496561601531353e-05,
"loss": 0.315,
"step": 580
},
{
"epoch": 0.62,
"learning_rate": 2.4768989627203123e-05,
"loss": 0.3382,
"step": 590
},
{
"epoch": 0.63,
"learning_rate": 2.4569403973001045e-05,
"loss": 0.3387,
"step": 600
},
{
"epoch": 0.64,
"learning_rate": 2.436691951208758e-05,
"loss": 0.3286,
"step": 610
},
{
"epoch": 0.65,
"learning_rate": 2.4161597581962526e-05,
"loss": 0.3096,
"step": 620
},
{
"epoch": 0.66,
"learning_rate": 2.395350037966456e-05,
"loss": 0.3384,
"step": 630
},
{
"epoch": 0.67,
"learning_rate": 2.3742690942930235e-05,
"loss": 0.3284,
"step": 640
},
{
"epoch": 0.68,
"learning_rate": 2.3529233131098313e-05,
"loss": 0.3295,
"step": 650
},
{
"epoch": 0.69,
"learning_rate": 2.33131916057652e-05,
"loss": 0.3164,
"step": 660
},
{
"epoch": 0.71,
"learning_rate": 2.309463181119736e-05,
"loss": 0.3597,
"step": 670
},
{
"epoch": 0.72,
"learning_rate": 2.287361995450667e-05,
"loss": 0.3239,
"step": 680
},
{
"epoch": 0.73,
"learning_rate": 2.2650222985594634e-05,
"loss": 0.3298,
"step": 690
},
{
"epoch": 0.74,
"learning_rate": 2.2424508576871623e-05,
"loss": 0.3176,
"step": 700
},
{
"epoch": 0.75,
"learning_rate": 2.219654510275728e-05,
"loss": 0.3344,
"step": 710
},
{
"epoch": 0.76,
"learning_rate": 2.1966401618968194e-05,
"loss": 0.3518,
"step": 720
},
{
"epoch": 0.77,
"learning_rate": 2.173414784159925e-05,
"loss": 0.3323,
"step": 730
},
{
"epoch": 0.78,
"learning_rate": 2.149985412600492e-05,
"loss": 0.3343,
"step": 740
},
{
"epoch": 0.79,
"learning_rate": 2.1263591445486895e-05,
"loss": 0.3383,
"step": 750
},
{
"epoch": 0.8,
"learning_rate": 2.1025431369794546e-05,
"loss": 0.3135,
"step": 760
},
{
"epoch": 0.81,
"learning_rate": 2.0785446043444677e-05,
"loss": 0.3278,
"step": 770
},
{
"epoch": 0.82,
"learning_rate": 2.0543708163867204e-05,
"loss": 0.3148,
"step": 780
},
{
"epoch": 0.83,
"learning_rate": 2.0300290959383318e-05,
"loss": 0.3511,
"step": 790
},
{
"epoch": 0.84,
"learning_rate": 2.0055268167022835e-05,
"loss": 0.3146,
"step": 800
},
{
"epoch": 0.85,
"learning_rate": 1.9808714010187425e-05,
"loss": 0.3321,
"step": 810
},
{
"epoch": 0.86,
"learning_rate": 1.9560703176166565e-05,
"loss": 0.3373,
"step": 820
},
{
"epoch": 0.87,
"learning_rate": 1.931131079351289e-05,
"loss": 0.32,
"step": 830
},
{
"epoch": 0.88,
"learning_rate": 1.9060612409283946e-05,
"loss": 0.3106,
"step": 840
},
{
"epoch": 0.89,
"learning_rate": 1.8808683966157132e-05,
"loss": 0.3167,
"step": 850
},
{
"epoch": 0.91,
"learning_rate": 1.8555601779424778e-05,
"loss": 0.2993,
"step": 860
},
{
"epoch": 0.92,
"learning_rate": 1.8301442513876406e-05,
"loss": 0.3405,
"step": 870
},
{
"epoch": 0.93,
"learning_rate": 1.804628316057508e-05,
"loss": 0.3346,
"step": 880
},
{
"epoch": 0.94,
"learning_rate": 1.779020101353492e-05,
"loss": 0.3319,
"step": 890
},
{
"epoch": 0.95,
"learning_rate": 1.7533273646306857e-05,
"loss": 0.3087,
"step": 900
},
{
"epoch": 0.96,
"learning_rate": 1.7275578888479714e-05,
"loss": 0.316,
"step": 910
},
{
"epoch": 0.97,
"learning_rate": 1.7017194802103705e-05,
"loss": 0.3054,
"step": 920
},
{
"epoch": 0.98,
"learning_rate": 1.6758199658043538e-05,
"loss": 0.3255,
"step": 930
},
{
"epoch": 0.99,
"learning_rate": 1.6498671912268256e-05,
"loss": 0.3175,
"step": 940
},
{
"epoch": 1.0,
"learning_rate": 1.623869018208499e-05,
"loss": 0.3424,
"step": 950
},
{
"epoch": 1.01,
"learning_rate": 1.5978333222323858e-05,
"loss": 0.2825,
"step": 960
},
{
"epoch": 1.02,
"learning_rate": 1.571767990148122e-05,
"loss": 0.2886,
"step": 970
},
{
"epoch": 1.03,
"learning_rate": 1.5456809177828444e-05,
"loss": 0.3196,
"step": 980
},
{
"epoch": 1.04,
"learning_rate": 1.5195800075493542e-05,
"loss": 0.3178,
"step": 990
},
{
"epoch": 1.05,
"learning_rate": 1.4934731660522817e-05,
"loss": 0.2676,
"step": 1000
},
{
"epoch": 1.05,
"eval_loss": 0.508576512336731,
"eval_runtime": 12.2399,
"eval_samples_per_second": 3.595,
"eval_steps_per_second": 0.899,
"step": 1000
},
{
"epoch": 1.06,
"learning_rate": 1.4673683016929805e-05,
"loss": 0.2803,
"step": 1010
},
{
"epoch": 1.07,
"learning_rate": 1.441273322273884e-05,
"loss": 0.319,
"step": 1020
},
{
"epoch": 1.08,
"learning_rate": 1.4151961326030314e-05,
"loss": 0.2965,
"step": 1030
},
{
"epoch": 1.09,
"learning_rate": 1.3891446320995143e-05,
"loss": 0.3067,
"step": 1040
},
{
"epoch": 1.11,
"learning_rate": 1.3631267124005453e-05,
"loss": 0.3072,
"step": 1050
},
{
"epoch": 1.12,
"learning_rate": 1.337150254970891e-05,
"loss": 0.2963,
"step": 1060
},
{
"epoch": 1.13,
"learning_rate": 1.3112231287153798e-05,
"loss": 0.3063,
"step": 1070
},
{
"epoch": 1.14,
"learning_rate": 1.28535318759522e-05,
"loss": 0.2732,
"step": 1080
},
{
"epoch": 1.15,
"learning_rate": 1.2595482682488443e-05,
"loss": 0.2874,
"step": 1090
},
{
"epoch": 1.16,
"learning_rate": 1.2338161876179964e-05,
"loss": 0.2998,
"step": 1100
},
{
"epoch": 1.17,
"learning_rate": 1.2081647405797923e-05,
"loss": 0.2849,
"step": 1110
},
{
"epoch": 1.18,
"learning_rate": 1.1826016975854563e-05,
"loss": 0.2735,
"step": 1120
},
{
"epoch": 1.19,
"learning_rate": 1.1571348023064662e-05,
"loss": 0.2937,
"step": 1130
},
{
"epoch": 1.2,
"learning_rate": 1.1317717692888014e-05,
"loss": 0.2993,
"step": 1140
},
{
"epoch": 1.21,
"learning_rate": 1.1065202816160213e-05,
"loss": 0.2933,
"step": 1150
},
{
"epoch": 1.22,
"learning_rate": 1.081387988581869e-05,
"loss": 0.285,
"step": 1160
},
{
"epoch": 1.23,
"learning_rate": 1.0563825033731146e-05,
"loss": 0.2896,
"step": 1170
},
{
"epoch": 1.24,
"learning_rate": 1.031511400763332e-05,
"loss": 0.2874,
"step": 1180
},
{
"epoch": 1.25,
"learning_rate": 1.0067822148183194e-05,
"loss": 0.2851,
"step": 1190
},
{
"epoch": 1.26,
"learning_rate": 9.822024366138397e-06,
"loss": 0.2925,
"step": 1200
},
{
"epoch": 1.27,
"learning_rate": 9.577795119663966e-06,
"loss": 0.2843,
"step": 1210
},
{
"epoch": 1.28,
"learning_rate": 9.335208391777106e-06,
"loss": 0.2884,
"step": 1220
},
{
"epoch": 1.29,
"learning_rate": 9.094337667935942e-06,
"loss": 0.269,
"step": 1230
},
{
"epoch": 1.31,
"learning_rate": 8.855255913778949e-06,
"loss": 0.2849,
"step": 1240
},
{
"epoch": 1.32,
"learning_rate": 8.618035553021925e-06,
"loss": 0.304,
"step": 1250
},
{
"epoch": 1.33,
"learning_rate": 8.382748445519008e-06,
"loss": 0.3049,
"step": 1260
},
{
"epoch": 1.34,
"learning_rate": 8.149465865494633e-06,
"loss": 0.2999,
"step": 1270
},
{
"epoch": 1.35,
"learning_rate": 7.918258479952763e-06,
"loss": 0.2835,
"step": 1280
},
{
"epoch": 1.36,
"learning_rate": 7.689196327270171e-06,
"loss": 0.3167,
"step": 1290
},
{
"epoch": 1.37,
"learning_rate": 7.462348795980088e-06,
"loss": 0.2842,
"step": 1300
},
{
"epoch": 1.38,
"learning_rate": 7.237784603752705e-06,
"loss": 0.2909,
"step": 1310
},
{
"epoch": 1.39,
"learning_rate": 7.015571776578922e-06,
"loss": 0.2881,
"step": 1320
},
{
"epoch": 1.4,
"learning_rate": 6.795777628163599e-06,
"loss": 0.2796,
"step": 1330
},
{
"epoch": 1.41,
"learning_rate": 6.578468739534602e-06,
"loss": 0.3056,
"step": 1340
},
{
"epoch": 1.42,
"learning_rate": 6.363710938873759e-06,
"loss": 0.2987,
"step": 1350
},
{
"epoch": 1.43,
"learning_rate": 6.151569281575925e-06,
"loss": 0.288,
"step": 1360
},
{
"epoch": 1.44,
"learning_rate": 5.942108030542074e-06,
"loss": 0.2954,
"step": 1370
},
{
"epoch": 1.45,
"learning_rate": 5.735390636712514e-06,
"loss": 0.2837,
"step": 1380
},
{
"epoch": 1.46,
"learning_rate": 5.531479719846038e-06,
"loss": 0.3055,
"step": 1390
},
{
"epoch": 1.47,
"learning_rate": 5.330437049550868e-06,
"loss": 0.313,
"step": 1400
},
{
"epoch": 1.48,
"learning_rate": 5.132323526573126e-06,
"loss": 0.2966,
"step": 1410
},
{
"epoch": 1.49,
"learning_rate": 4.937199164348521e-06,
"loss": 0.2741,
"step": 1420
},
{
"epoch": 1.51,
"learning_rate": 4.745123070822786e-06,
"loss": 0.2973,
"step": 1430
},
{
"epoch": 1.52,
"learning_rate": 4.556153430546451e-06,
"loss": 0.281,
"step": 1440
},
{
"epoch": 1.53,
"learning_rate": 4.370347487049313e-06,
"loss": 0.2905,
"step": 1450
},
{
"epoch": 1.54,
"learning_rate": 4.187761525499973e-06,
"loss": 0.2806,
"step": 1460
},
{
"epoch": 1.55,
"learning_rate": 4.008450855655675e-06,
"loss": 0.2716,
"step": 1470
},
{
"epoch": 1.56,
"learning_rate": 3.83246979510764e-06,
"loss": 0.2834,
"step": 1480
},
{
"epoch": 1.57,
"learning_rate": 3.676977737529078e-06,
"loss": 0.2809,
"step": 1490
},
{
"epoch": 1.58,
"learning_rate": 3.5074689542164895e-06,
"loss": 0.2845,
"step": 1500
},
{
"epoch": 1.59,
"learning_rate": 3.341441539881574e-06,
"loss": 0.3001,
"step": 1510
},
{
"epoch": 1.6,
"learning_rate": 3.1789457882922753e-06,
"loss": 0.2941,
"step": 1520
},
{
"epoch": 1.61,
"learning_rate": 3.020030923389471e-06,
"loss": 0.2917,
"step": 1530
},
{
"epoch": 1.62,
"learning_rate": 2.86474508437579e-06,
"loss": 0.293,
"step": 1540
},
{
"epoch": 1.63,
"learning_rate": 2.7131353111330843e-06,
"loss": 0.2941,
"step": 1550
},
{
"epoch": 1.64,
"learning_rate": 2.565247529972901e-06,
"loss": 0.2787,
"step": 1560
},
{
"epoch": 1.65,
"learning_rate": 2.4211265397242854e-06,
"loss": 0.2899,
"step": 1570
},
{
"epoch": 1.66,
"learning_rate": 2.280815998163083e-06,
"loss": 0.2814,
"step": 1580
},
{
"epoch": 1.67,
"learning_rate": 2.144358408786986e-06,
"loss": 0.2786,
"step": 1590
},
{
"epoch": 1.68,
"learning_rate": 2.011795107940138e-06,
"loss": 0.2916,
"step": 1600
},
{
"epoch": 1.69,
"learning_rate": 1.8831662522913594e-06,
"loss": 0.2834,
"step": 1610
},
{
"epoch": 1.71,
"learning_rate": 1.7585108066697136e-06,
"loss": 0.2735,
"step": 1620
},
{
"epoch": 1.72,
"learning_rate": 1.6378665322611002e-06,
"loss": 0.3039,
"step": 1630
},
{
"epoch": 1.73,
"learning_rate": 1.521269975169471e-06,
"loss": 0.2872,
"step": 1640
},
{
"epoch": 1.74,
"learning_rate": 1.408756455346114e-06,
"loss": 0.2863,
"step": 1650
},
{
"epoch": 1.75,
"learning_rate": 1.3003600558903927e-06,
"loss": 0.2854,
"step": 1660
},
{
"epoch": 1.76,
"learning_rate": 1.196113612725116e-06,
"loss": 0.2727,
"step": 1670
},
{
"epoch": 1.77,
"learning_rate": 1.0960487046497524e-06,
"loss": 0.2763,
"step": 1680
},
{
"epoch": 1.78,
"learning_rate": 1.000195643774431e-06,
"loss": 0.2962,
"step": 1690
},
{
"epoch": 1.79,
"learning_rate": 9.085834663376629e-07,
"loss": 0.2888,
"step": 1700
},
{
"epoch": 1.8,
"learning_rate": 8.212399239105534e-07,
"loss": 0.2839,
"step": 1710
},
{
"epoch": 1.81,
"learning_rate": 7.381914749901752e-07,
"loss": 0.2789,
"step": 1720
},
{
"epoch": 1.82,
"learning_rate": 6.594632769846353e-07,
"loss": 0.2772,
"step": 1730
},
{
"epoch": 1.83,
"learning_rate": 5.850791785922849e-07,
"loss": 0.278,
"step": 1740
},
{
"epoch": 1.84,
"learning_rate": 5.150617125773633e-07,
"loss": 0.2878,
"step": 1750
},
{
"epoch": 1.85,
"learning_rate": 4.494320889442749e-07,
"loss": 0.2734,
"step": 1760
},
{
"epoch": 1.86,
"learning_rate": 3.882101885125539e-07,
"loss": 0.2826,
"step": 1770
},
{
"epoch": 1.87,
"learning_rate": 3.3141455689448266e-07,
"loss": 0.2875,
"step": 1780
},
{
"epoch": 1.88,
"learning_rate": 2.790623988771712e-07,
"loss": 0.2898,
"step": 1790
},
{
"epoch": 1.89,
"learning_rate": 2.3116957321080102e-07,
"loss": 0.2919,
"step": 1800
},
{
"epoch": 1.91,
"learning_rate": 1.8775058780463094e-07,
"loss": 0.2778,
"step": 1810
},
{
"epoch": 1.92,
"learning_rate": 1.4881859533218466e-07,
"loss": 0.3179,
"step": 1820
},
{
"epoch": 1.93,
"learning_rate": 1.1438538924699094e-07,
"loss": 0.2771,
"step": 1830
},
{
"epoch": 1.94,
"learning_rate": 8.446140021006132e-08,
"loss": 0.2817,
"step": 1840
},
{
"epoch": 1.95,
"learning_rate": 5.9055692930179426e-08,
"loss": 0.2903,
"step": 1850
},
{
"epoch": 1.96,
"learning_rate": 3.8175963417980685e-08,
"loss": 0.2934,
"step": 1860
},
{
"epoch": 1.97,
"learning_rate": 2.1828536654647235e-08,
"loss": 0.2952,
"step": 1870
},
{
"epoch": 1.98,
"learning_rate": 1.0018364675912217e-08,
"loss": 0.2881,
"step": 1880
},
{
"epoch": 1.99,
"learning_rate": 2.7490250719663933e-09,
"loss": 0.2701,
"step": 1890
},
{
"epoch": 2.0,
"learning_rate": 2.2719903721712954e-11,
"loss": 0.2955,
"step": 1900
},
{
"epoch": 2.0,
"step": 1900,
"total_flos": 3.4203213408659046e+17,
"train_loss": 0.3227183536165639,
"train_runtime": 6710.5312,
"train_samples_per_second": 1.132,
"train_steps_per_second": 0.283
}
],
"logging_steps": 10,
"max_steps": 1900,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 1000,
"total_flos": 3.4203213408659046e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}