sft-mistral-v1-clean-valid / trainer_state.json
hllj's picture
Model save
2ac67cd
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.4460093896713615,
"eval_steps": 1000,
"global_step": 1900,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.4084507042253522e-07,
"loss": 0.6105,
"step": 1
},
{
"epoch": 0.0,
"learning_rate": 1.4084507042253521e-06,
"loss": 0.7205,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 2.8169014084507042e-06,
"loss": 0.694,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 4.225352112676057e-06,
"loss": 0.6913,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 5.6338028169014084e-06,
"loss": 0.5948,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 7.042253521126761e-06,
"loss": 0.5666,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 8.450704225352114e-06,
"loss": 0.4994,
"step": 60
},
{
"epoch": 0.03,
"learning_rate": 9.859154929577466e-06,
"loss": 0.4599,
"step": 70
},
{
"epoch": 0.04,
"learning_rate": 1.1267605633802817e-05,
"loss": 0.4061,
"step": 80
},
{
"epoch": 0.04,
"learning_rate": 1.267605633802817e-05,
"loss": 0.4111,
"step": 90
},
{
"epoch": 0.05,
"learning_rate": 1.4084507042253522e-05,
"loss": 0.3984,
"step": 100
},
{
"epoch": 0.05,
"learning_rate": 1.5492957746478876e-05,
"loss": 0.3893,
"step": 110
},
{
"epoch": 0.06,
"learning_rate": 1.6901408450704228e-05,
"loss": 0.4009,
"step": 120
},
{
"epoch": 0.06,
"learning_rate": 1.830985915492958e-05,
"loss": 0.4021,
"step": 130
},
{
"epoch": 0.07,
"learning_rate": 1.9718309859154933e-05,
"loss": 0.4013,
"step": 140
},
{
"epoch": 0.07,
"learning_rate": 2.112676056338028e-05,
"loss": 0.3468,
"step": 150
},
{
"epoch": 0.08,
"learning_rate": 2.2535211267605634e-05,
"loss": 0.3506,
"step": 160
},
{
"epoch": 0.08,
"learning_rate": 2.3943661971830986e-05,
"loss": 0.3765,
"step": 170
},
{
"epoch": 0.08,
"learning_rate": 2.535211267605634e-05,
"loss": 0.3766,
"step": 180
},
{
"epoch": 0.09,
"learning_rate": 2.676056338028169e-05,
"loss": 0.3583,
"step": 190
},
{
"epoch": 0.09,
"learning_rate": 2.8169014084507043e-05,
"loss": 0.3613,
"step": 200
},
{
"epoch": 0.1,
"learning_rate": 2.9577464788732395e-05,
"loss": 0.3604,
"step": 210
},
{
"epoch": 0.1,
"learning_rate": 2.9999778542898527e-05,
"loss": 0.3396,
"step": 220
},
{
"epoch": 0.11,
"learning_rate": 2.9998693870796316e-05,
"loss": 0.3655,
"step": 230
},
{
"epoch": 0.11,
"learning_rate": 2.9996705373180166e-05,
"loss": 0.3373,
"step": 240
},
{
"epoch": 0.12,
"learning_rate": 2.9993813169877495e-05,
"loss": 0.3397,
"step": 250
},
{
"epoch": 0.12,
"learning_rate": 2.9990017435173293e-05,
"loss": 0.3497,
"step": 260
},
{
"epoch": 0.13,
"learning_rate": 2.9985318397799606e-05,
"loss": 0.3548,
"step": 270
},
{
"epoch": 0.13,
"learning_rate": 2.9979716340921736e-05,
"loss": 0.3584,
"step": 280
},
{
"epoch": 0.14,
"learning_rate": 2.997321160212122e-05,
"loss": 0.3477,
"step": 290
},
{
"epoch": 0.14,
"learning_rate": 2.996580457337544e-05,
"loss": 0.3731,
"step": 300
},
{
"epoch": 0.15,
"learning_rate": 2.9957495701034037e-05,
"loss": 0.3688,
"step": 310
},
{
"epoch": 0.15,
"learning_rate": 2.9948285485792e-05,
"loss": 0.3399,
"step": 320
},
{
"epoch": 0.15,
"learning_rate": 2.993817448265948e-05,
"loss": 0.3419,
"step": 330
},
{
"epoch": 0.16,
"learning_rate": 2.992716330092839e-05,
"loss": 0.3663,
"step": 340
},
{
"epoch": 0.16,
"learning_rate": 2.9915252604135618e-05,
"loss": 0.358,
"step": 350
},
{
"epoch": 0.17,
"learning_rate": 2.9902443110023127e-05,
"loss": 0.3332,
"step": 360
},
{
"epoch": 0.17,
"learning_rate": 2.9888735590494616e-05,
"loss": 0.3315,
"step": 370
},
{
"epoch": 0.18,
"learning_rate": 2.9874130871569087e-05,
"loss": 0.3519,
"step": 380
},
{
"epoch": 0.18,
"learning_rate": 2.9858629833331002e-05,
"loss": 0.3876,
"step": 390
},
{
"epoch": 0.19,
"learning_rate": 2.9842233409877296e-05,
"loss": 0.3219,
"step": 400
},
{
"epoch": 0.19,
"learning_rate": 2.9824942589261053e-05,
"loss": 0.3465,
"step": 410
},
{
"epoch": 0.2,
"learning_rate": 2.9806758413431997e-05,
"loss": 0.3564,
"step": 420
},
{
"epoch": 0.2,
"learning_rate": 2.978768197817368e-05,
"loss": 0.3719,
"step": 430
},
{
"epoch": 0.21,
"learning_rate": 2.976771443303745e-05,
"loss": 0.345,
"step": 440
},
{
"epoch": 0.21,
"learning_rate": 2.974685698127321e-05,
"loss": 0.325,
"step": 450
},
{
"epoch": 0.22,
"learning_rate": 2.9725110879756868e-05,
"loss": 0.3461,
"step": 460
},
{
"epoch": 0.22,
"learning_rate": 2.9702477438914617e-05,
"loss": 0.3338,
"step": 470
},
{
"epoch": 0.23,
"learning_rate": 2.9678958022643983e-05,
"loss": 0.3533,
"step": 480
},
{
"epoch": 0.23,
"learning_rate": 2.9654554048231597e-05,
"loss": 0.3274,
"step": 490
},
{
"epoch": 0.23,
"learning_rate": 2.9629266986267835e-05,
"loss": 0.3423,
"step": 500
},
{
"epoch": 0.24,
"learning_rate": 2.9603098360558167e-05,
"loss": 0.3256,
"step": 510
},
{
"epoch": 0.24,
"learning_rate": 2.957604974803134e-05,
"loss": 0.33,
"step": 520
},
{
"epoch": 0.25,
"learning_rate": 2.9548122778644357e-05,
"loss": 0.3108,
"step": 530
},
{
"epoch": 0.25,
"learning_rate": 2.9519319135284252e-05,
"loss": 0.3473,
"step": 540
},
{
"epoch": 0.26,
"learning_rate": 2.9489640553666687e-05,
"loss": 0.3242,
"step": 550
},
{
"epoch": 0.26,
"learning_rate": 2.945908882223134e-05,
"loss": 0.3455,
"step": 560
},
{
"epoch": 0.27,
"learning_rate": 2.9427665782034143e-05,
"loss": 0.3417,
"step": 570
},
{
"epoch": 0.27,
"learning_rate": 2.9395373326636344e-05,
"loss": 0.3262,
"step": 580
},
{
"epoch": 0.28,
"learning_rate": 2.9362213401990395e-05,
"loss": 0.3507,
"step": 590
},
{
"epoch": 0.28,
"learning_rate": 2.9328188006322693e-05,
"loss": 0.3396,
"step": 600
},
{
"epoch": 0.29,
"learning_rate": 2.9293299190013143e-05,
"loss": 0.3283,
"step": 610
},
{
"epoch": 0.29,
"learning_rate": 2.9257549055471645e-05,
"loss": 0.3119,
"step": 620
},
{
"epoch": 0.3,
"learning_rate": 2.9220939757011366e-05,
"loss": 0.338,
"step": 630
},
{
"epoch": 0.3,
"learning_rate": 2.9183473500718938e-05,
"loss": 0.3285,
"step": 640
},
{
"epoch": 0.31,
"learning_rate": 2.9145152544321504e-05,
"loss": 0.3293,
"step": 650
},
{
"epoch": 0.31,
"learning_rate": 2.9105979197050683e-05,
"loss": 0.3528,
"step": 660
},
{
"epoch": 0.31,
"learning_rate": 2.906595581950341e-05,
"loss": 0.3183,
"step": 670
},
{
"epoch": 0.32,
"learning_rate": 2.902508482349968e-05,
"loss": 0.3593,
"step": 680
},
{
"epoch": 0.32,
"learning_rate": 2.898336867193721e-05,
"loss": 0.3371,
"step": 690
},
{
"epoch": 0.33,
"learning_rate": 2.8940809878643038e-05,
"loss": 0.3313,
"step": 700
},
{
"epoch": 0.33,
"learning_rate": 2.8897411008222026e-05,
"loss": 0.3298,
"step": 710
},
{
"epoch": 0.34,
"learning_rate": 2.8853174675902323e-05,
"loss": 0.3366,
"step": 720
},
{
"epoch": 0.34,
"learning_rate": 2.8808103547377754e-05,
"loss": 0.3408,
"step": 730
},
{
"epoch": 0.35,
"learning_rate": 2.8762200338647222e-05,
"loss": 0.3328,
"step": 740
},
{
"epoch": 0.35,
"learning_rate": 2.8715467815850994e-05,
"loss": 0.3235,
"step": 750
},
{
"epoch": 0.36,
"learning_rate": 2.8667908795104053e-05,
"loss": 0.3455,
"step": 760
},
{
"epoch": 0.36,
"learning_rate": 2.8619526142326367e-05,
"loss": 0.322,
"step": 770
},
{
"epoch": 0.37,
"learning_rate": 2.8570322773070217e-05,
"loss": 0.3367,
"step": 780
},
{
"epoch": 0.37,
"learning_rate": 2.8520301652344476e-05,
"loss": 0.3576,
"step": 790
},
{
"epoch": 0.38,
"learning_rate": 2.8469465794435965e-05,
"loss": 0.332,
"step": 800
},
{
"epoch": 0.38,
"learning_rate": 2.8417818262727784e-05,
"loss": 0.3156,
"step": 810
},
{
"epoch": 0.38,
"learning_rate": 2.8365362169514726e-05,
"loss": 0.3305,
"step": 820
},
{
"epoch": 0.39,
"learning_rate": 2.8312100675815736e-05,
"loss": 0.3238,
"step": 830
},
{
"epoch": 0.39,
"learning_rate": 2.8258036991183414e-05,
"loss": 0.3092,
"step": 840
},
{
"epoch": 0.4,
"learning_rate": 2.8203174373510617e-05,
"loss": 0.3503,
"step": 850
},
{
"epoch": 0.4,
"learning_rate": 2.8147516128834116e-05,
"loss": 0.3112,
"step": 860
},
{
"epoch": 0.41,
"learning_rate": 2.809106561113541e-05,
"loss": 0.3307,
"step": 870
},
{
"epoch": 0.41,
"learning_rate": 2.803382622213857e-05,
"loss": 0.3317,
"step": 880
},
{
"epoch": 0.42,
"learning_rate": 2.7975801411105307e-05,
"loss": 0.3328,
"step": 890
},
{
"epoch": 0.42,
"learning_rate": 2.7916994674627045e-05,
"loss": 0.3301,
"step": 900
},
{
"epoch": 0.43,
"learning_rate": 2.7857409556414283e-05,
"loss": 0.3271,
"step": 910
},
{
"epoch": 0.43,
"learning_rate": 2.7797049647083016e-05,
"loss": 0.3154,
"step": 920
},
{
"epoch": 0.44,
"learning_rate": 2.7735918583938363e-05,
"loss": 0.328,
"step": 930
},
{
"epoch": 0.44,
"learning_rate": 2.76740200507554e-05,
"loss": 0.3193,
"step": 940
},
{
"epoch": 0.45,
"learning_rate": 2.761135777755715e-05,
"loss": 0.3222,
"step": 950
},
{
"epoch": 1.0,
"learning_rate": 2.7547935540389843e-05,
"loss": 0.3022,
"step": 960
},
{
"epoch": 1.01,
"learning_rate": 2.748375716109533e-05,
"loss": 0.3119,
"step": 970
},
{
"epoch": 1.01,
"learning_rate": 2.7418826507080818e-05,
"loss": 0.3019,
"step": 980
},
{
"epoch": 1.02,
"learning_rate": 2.7353147491085785e-05,
"loss": 0.3181,
"step": 990
},
{
"epoch": 1.02,
"learning_rate": 2.728672407094622e-05,
"loss": 0.312,
"step": 1000
},
{
"epoch": 1.02,
"eval_loss": 0.5085553526878357,
"eval_runtime": 6.8207,
"eval_samples_per_second": 20.526,
"eval_steps_per_second": 5.131,
"step": 1000
},
{
"epoch": 1.03,
"learning_rate": 2.7219560249356125e-05,
"loss": 0.2941,
"step": 1010
},
{
"epoch": 1.03,
"learning_rate": 2.7151660073626283e-05,
"loss": 0.2852,
"step": 1020
},
{
"epoch": 1.04,
"learning_rate": 2.7083027635440392e-05,
"loss": 0.3113,
"step": 1030
},
{
"epoch": 1.04,
"learning_rate": 2.7013667070608502e-05,
"loss": 0.2969,
"step": 1040
},
{
"epoch": 1.05,
"learning_rate": 2.6943582558817764e-05,
"loss": 0.2912,
"step": 1050
},
{
"epoch": 1.05,
"learning_rate": 2.6872778323380585e-05,
"loss": 0.2851,
"step": 1060
},
{
"epoch": 1.06,
"learning_rate": 2.6801258630980117e-05,
"loss": 0.3045,
"step": 1070
},
{
"epoch": 1.06,
"learning_rate": 2.6729027791413154e-05,
"loss": 0.3157,
"step": 1080
},
{
"epoch": 1.07,
"learning_rate": 2.6656090157330424e-05,
"loss": 0.2968,
"step": 1090
},
{
"epoch": 1.07,
"learning_rate": 2.6582450123974278e-05,
"loss": 0.2832,
"step": 1100
},
{
"epoch": 1.08,
"learning_rate": 2.650811212891385e-05,
"loss": 0.32,
"step": 1110
},
{
"epoch": 1.08,
"learning_rate": 2.6433080651777655e-05,
"loss": 0.2936,
"step": 1120
},
{
"epoch": 1.08,
"learning_rate": 2.635736021398361e-05,
"loss": 0.3094,
"step": 1130
},
{
"epoch": 1.09,
"learning_rate": 2.628095537846661e-05,
"loss": 0.3073,
"step": 1140
},
{
"epoch": 1.09,
"learning_rate": 2.6203870749403553e-05,
"loss": 0.3067,
"step": 1150
},
{
"epoch": 1.1,
"learning_rate": 2.6126110971935878e-05,
"loss": 0.309,
"step": 1160
},
{
"epoch": 1.1,
"learning_rate": 2.604768073188966e-05,
"loss": 0.2851,
"step": 1170
},
{
"epoch": 1.11,
"learning_rate": 2.5968584755493233e-05,
"loss": 0.3074,
"step": 1180
},
{
"epoch": 1.11,
"learning_rate": 2.5888827809092406e-05,
"loss": 0.3012,
"step": 1190
},
{
"epoch": 1.12,
"learning_rate": 2.5808414698863205e-05,
"loss": 0.3042,
"step": 1200
},
{
"epoch": 1.12,
"learning_rate": 2.5727350270522293e-05,
"loss": 0.3072,
"step": 1210
},
{
"epoch": 1.13,
"learning_rate": 2.5645639409034935e-05,
"loss": 0.2948,
"step": 1220
},
{
"epoch": 1.13,
"learning_rate": 2.5563287038320635e-05,
"loss": 0.3042,
"step": 1230
},
{
"epoch": 1.14,
"learning_rate": 2.548029812095644e-05,
"loss": 0.3112,
"step": 1240
},
{
"epoch": 1.14,
"learning_rate": 2.539667765787786e-05,
"loss": 0.3213,
"step": 1250
},
{
"epoch": 1.15,
"learning_rate": 2.531243068807754e-05,
"loss": 0.2931,
"step": 1260
},
{
"epoch": 1.15,
"learning_rate": 2.522756228830158e-05,
"loss": 0.2802,
"step": 1270
},
{
"epoch": 1.15,
"learning_rate": 2.5142077572743643e-05,
"loss": 0.3049,
"step": 1280
},
{
"epoch": 1.16,
"learning_rate": 2.5055981692736758e-05,
"loss": 0.3234,
"step": 1290
},
{
"epoch": 1.16,
"learning_rate": 2.4969279836442868e-05,
"loss": 0.286,
"step": 1300
},
{
"epoch": 1.17,
"learning_rate": 2.4881977228540243e-05,
"loss": 0.3099,
"step": 1310
},
{
"epoch": 1.17,
"learning_rate": 2.4794079129908606e-05,
"loss": 0.2811,
"step": 1320
},
{
"epoch": 1.18,
"learning_rate": 2.470559083731212e-05,
"loss": 0.3202,
"step": 1330
},
{
"epoch": 1.18,
"learning_rate": 2.4616517683080197e-05,
"loss": 0.3031,
"step": 1340
},
{
"epoch": 1.19,
"learning_rate": 2.4526865034786184e-05,
"loss": 0.2663,
"step": 1350
},
{
"epoch": 1.19,
"learning_rate": 2.4436638294923902e-05,
"loss": 0.2946,
"step": 1360
},
{
"epoch": 1.2,
"learning_rate": 2.4345842900582084e-05,
"loss": 0.2625,
"step": 1370
},
{
"epoch": 1.2,
"learning_rate": 2.4254484323116746e-05,
"loss": 0.2953,
"step": 1380
},
{
"epoch": 1.21,
"learning_rate": 2.4162568067821478e-05,
"loss": 0.3124,
"step": 1390
},
{
"epoch": 1.21,
"learning_rate": 2.4070099673595696e-05,
"loss": 0.3166,
"step": 1400
},
{
"epoch": 1.22,
"learning_rate": 2.3977084712610862e-05,
"loss": 0.3096,
"step": 1410
},
{
"epoch": 1.22,
"learning_rate": 2.3883528789974703e-05,
"loss": 0.3054,
"step": 1420
},
{
"epoch": 1.23,
"learning_rate": 2.3789437543393446e-05,
"loss": 0.3024,
"step": 1430
},
{
"epoch": 1.23,
"learning_rate": 2.3694816642832087e-05,
"loss": 0.2855,
"step": 1440
},
{
"epoch": 1.23,
"learning_rate": 2.3599671790172738e-05,
"loss": 0.2768,
"step": 1450
},
{
"epoch": 1.24,
"learning_rate": 2.3504008718870983e-05,
"loss": 0.289,
"step": 1460
},
{
"epoch": 1.24,
"learning_rate": 2.3407833193610427e-05,
"loss": 0.2805,
"step": 1470
},
{
"epoch": 1.25,
"learning_rate": 2.3311151009955297e-05,
"loss": 0.2729,
"step": 1480
},
{
"epoch": 1.25,
"learning_rate": 2.3213967994001185e-05,
"loss": 0.2649,
"step": 1490
},
{
"epoch": 1.26,
"learning_rate": 2.3116290002023982e-05,
"loss": 0.2858,
"step": 1500
},
{
"epoch": 1.26,
"learning_rate": 2.301812292012698e-05,
"loss": 0.2785,
"step": 1510
},
{
"epoch": 1.27,
"learning_rate": 2.291947266388616e-05,
"loss": 0.2951,
"step": 1520
},
{
"epoch": 1.27,
"learning_rate": 2.2820345177993727e-05,
"loss": 0.2612,
"step": 1530
},
{
"epoch": 1.28,
"learning_rate": 2.272074643589988e-05,
"loss": 0.2873,
"step": 1540
},
{
"epoch": 1.28,
"learning_rate": 2.262068243945285e-05,
"loss": 0.276,
"step": 1550
},
{
"epoch": 1.29,
"learning_rate": 2.252015921853723e-05,
"loss": 0.2888,
"step": 1560
},
{
"epoch": 1.29,
"learning_rate": 2.2419182830710593e-05,
"loss": 0.2721,
"step": 1570
},
{
"epoch": 1.3,
"learning_rate": 2.23177593608385e-05,
"loss": 0.2805,
"step": 1580
},
{
"epoch": 1.3,
"learning_rate": 2.221589492072778e-05,
"loss": 0.2719,
"step": 1590
},
{
"epoch": 1.31,
"learning_rate": 2.2113595648758273e-05,
"loss": 0.2703,
"step": 1600
},
{
"epoch": 1.31,
"learning_rate": 2.2010867709512895e-05,
"loss": 0.2661,
"step": 1610
},
{
"epoch": 1.31,
"learning_rate": 2.1907717293406175e-05,
"loss": 0.2665,
"step": 1620
},
{
"epoch": 1.32,
"learning_rate": 2.1804150616311222e-05,
"loss": 0.2791,
"step": 1630
},
{
"epoch": 1.32,
"learning_rate": 2.1700173919185144e-05,
"loss": 0.2549,
"step": 1640
},
{
"epoch": 1.33,
"learning_rate": 2.1595793467692967e-05,
"loss": 0.2934,
"step": 1650
},
{
"epoch": 1.33,
"learning_rate": 2.149101555183009e-05,
"loss": 0.2666,
"step": 1660
},
{
"epoch": 1.34,
"learning_rate": 2.1385846485543202e-05,
"loss": 0.3041,
"step": 1670
},
{
"epoch": 1.34,
"learning_rate": 2.1280292606349838e-05,
"loss": 0.2651,
"step": 1680
},
{
"epoch": 1.35,
"learning_rate": 2.117436027495647e-05,
"loss": 0.2718,
"step": 1690
},
{
"epoch": 1.35,
"learning_rate": 2.106805587487519e-05,
"loss": 0.2625,
"step": 1700
},
{
"epoch": 1.36,
"learning_rate": 2.096138581203908e-05,
"loss": 0.284,
"step": 1710
},
{
"epoch": 1.36,
"learning_rate": 2.0854356514416144e-05,
"loss": 0.2865,
"step": 1720
},
{
"epoch": 1.37,
"learning_rate": 2.0746974431621968e-05,
"loss": 0.288,
"step": 1730
},
{
"epoch": 1.37,
"learning_rate": 2.06392460345311e-05,
"loss": 0.2704,
"step": 1740
},
{
"epoch": 1.38,
"learning_rate": 2.053117781488706e-05,
"loss": 0.2896,
"step": 1750
},
{
"epoch": 1.38,
"learning_rate": 2.0422776284911175e-05,
"loss": 0.2813,
"step": 1760
},
{
"epoch": 1.38,
"learning_rate": 2.031404797691016e-05,
"loss": 0.2857,
"step": 1770
},
{
"epoch": 1.39,
"learning_rate": 2.0204999442882447e-05,
"loss": 0.3063,
"step": 1780
},
{
"epoch": 1.39,
"learning_rate": 2.0095637254123392e-05,
"loss": 0.2837,
"step": 1790
},
{
"epoch": 1.4,
"learning_rate": 1.998596800082927e-05,
"loss": 0.2851,
"step": 1800
},
{
"epoch": 1.4,
"learning_rate": 1.9875998291700148e-05,
"loss": 0.2852,
"step": 1810
},
{
"epoch": 1.41,
"learning_rate": 1.976573475354165e-05,
"loss": 0.2651,
"step": 1820
},
{
"epoch": 1.41,
"learning_rate": 1.9655184030865617e-05,
"loss": 0.2655,
"step": 1830
},
{
"epoch": 1.42,
"learning_rate": 1.9544352785489706e-05,
"loss": 0.2758,
"step": 1840
},
{
"epoch": 1.42,
"learning_rate": 1.9433247696135967e-05,
"loss": 0.2698,
"step": 1850
},
{
"epoch": 1.43,
"learning_rate": 1.9321875458028347e-05,
"loss": 0.2988,
"step": 1860
},
{
"epoch": 1.43,
"learning_rate": 1.9210242782489266e-05,
"loss": 0.2723,
"step": 1870
},
{
"epoch": 1.44,
"learning_rate": 1.9098356396535167e-05,
"loss": 0.2726,
"step": 1880
},
{
"epoch": 1.44,
"learning_rate": 1.8986223042471144e-05,
"loss": 0.2541,
"step": 1890
},
{
"epoch": 1.45,
"learning_rate": 1.8873849477484696e-05,
"loss": 0.2822,
"step": 1900
},
{
"epoch": 1.45,
"step": 1900,
"total_flos": 3.4203213408659046e+17,
"train_loss": 0.32689529290324765,
"train_runtime": 3915.3355,
"train_samples_per_second": 4.351,
"train_steps_per_second": 1.088
}
],
"logging_steps": 10,
"max_steps": 4260,
"num_train_epochs": 2,
"save_steps": 1000,
"total_flos": 3.4203213408659046e+17,
"trial_name": null,
"trial_params": null
}