push_block_isaaclab / trainer_state.json
theconstruct-ai's picture
Upload checkpoint 3000
d8c12f1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.3,
"eval_steps": 500,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"grad_norm": 0.7378236055374146,
"learning_rate": 1.8e-06,
"loss": 1.0762,
"step": 10
},
{
"grad_norm": 0.295481413602829,
"learning_rate": 3.8e-06,
"loss": 1.068,
"step": 20
},
{
"grad_norm": 0.14812137186527252,
"learning_rate": 5.8e-06,
"loss": 1.0568,
"step": 30
},
{
"grad_norm": 0.08043279498815536,
"learning_rate": 7.8e-06,
"loss": 1.0493,
"step": 40
},
{
"grad_norm": 0.08855466544628143,
"learning_rate": 9.800000000000001e-06,
"loss": 1.0441,
"step": 50
},
{
"grad_norm": 0.0877288430929184,
"learning_rate": 1.18e-05,
"loss": 1.0395,
"step": 60
},
{
"grad_norm": 0.09314433485269547,
"learning_rate": 1.3800000000000002e-05,
"loss": 1.0285,
"step": 70
},
{
"grad_norm": 0.10957096517086029,
"learning_rate": 1.58e-05,
"loss": 1.0216,
"step": 80
},
{
"grad_norm": 0.09380817413330078,
"learning_rate": 1.78e-05,
"loss": 1.0177,
"step": 90
},
{
"grad_norm": 0.12308425456285477,
"learning_rate": 1.9800000000000004e-05,
"loss": 1.0174,
"step": 100
},
{
"grad_norm": 0.20681029558181763,
"learning_rate": 2.18e-05,
"loss": 1.0002,
"step": 110
},
{
"grad_norm": 0.43010827898979187,
"learning_rate": 2.38e-05,
"loss": 0.9756,
"step": 120
},
{
"grad_norm": 0.5976276993751526,
"learning_rate": 2.58e-05,
"loss": 0.9313,
"step": 130
},
{
"grad_norm": 0.7003195881843567,
"learning_rate": 2.7800000000000005e-05,
"loss": 0.8658,
"step": 140
},
{
"grad_norm": 0.9297671318054199,
"learning_rate": 2.98e-05,
"loss": 0.7985,
"step": 150
},
{
"grad_norm": 0.8993100523948669,
"learning_rate": 3.18e-05,
"loss": 0.7352,
"step": 160
},
{
"grad_norm": 0.9233132004737854,
"learning_rate": 3.38e-05,
"loss": 0.6818,
"step": 170
},
{
"grad_norm": 0.8877270221710205,
"learning_rate": 3.58e-05,
"loss": 0.6224,
"step": 180
},
{
"grad_norm": 0.8032844662666321,
"learning_rate": 3.7800000000000004e-05,
"loss": 0.5745,
"step": 190
},
{
"grad_norm": 1.4975675344467163,
"learning_rate": 3.9800000000000005e-05,
"loss": 0.5299,
"step": 200
},
{
"grad_norm": 0.8472157716751099,
"learning_rate": 4.18e-05,
"loss": 0.498,
"step": 210
},
{
"grad_norm": 0.974686324596405,
"learning_rate": 4.38e-05,
"loss": 0.4575,
"step": 220
},
{
"grad_norm": 0.9499194025993347,
"learning_rate": 4.58e-05,
"loss": 0.4184,
"step": 230
},
{
"grad_norm": 0.9380892515182495,
"learning_rate": 4.78e-05,
"loss": 0.3887,
"step": 240
},
{
"grad_norm": 1.1189147233963013,
"learning_rate": 4.9800000000000004e-05,
"loss": 0.356,
"step": 250
},
{
"grad_norm": 1.1425296068191528,
"learning_rate": 5.1800000000000005e-05,
"loss": 0.3323,
"step": 260
},
{
"grad_norm": 1.6566969156265259,
"learning_rate": 5.380000000000001e-05,
"loss": 0.3011,
"step": 270
},
{
"grad_norm": 1.130071759223938,
"learning_rate": 5.580000000000001e-05,
"loss": 0.266,
"step": 280
},
{
"grad_norm": 0.748692512512207,
"learning_rate": 5.7799999999999995e-05,
"loss": 0.2453,
"step": 290
},
{
"grad_norm": 1.0026001930236816,
"learning_rate": 5.9800000000000003e-05,
"loss": 0.2321,
"step": 300
},
{
"grad_norm": 1.0121994018554688,
"learning_rate": 6.18e-05,
"loss": 0.2158,
"step": 310
},
{
"grad_norm": 1.153480052947998,
"learning_rate": 6.38e-05,
"loss": 0.1899,
"step": 320
},
{
"grad_norm": 1.013115644454956,
"learning_rate": 6.58e-05,
"loss": 0.1817,
"step": 330
},
{
"grad_norm": 0.9551103115081787,
"learning_rate": 6.780000000000001e-05,
"loss": 0.1799,
"step": 340
},
{
"grad_norm": 1.242201566696167,
"learning_rate": 6.98e-05,
"loss": 0.1602,
"step": 350
},
{
"grad_norm": 1.2399349212646484,
"learning_rate": 7.18e-05,
"loss": 0.1463,
"step": 360
},
{
"grad_norm": 0.9403181076049805,
"learning_rate": 7.38e-05,
"loss": 0.1228,
"step": 370
},
{
"grad_norm": 0.894763171672821,
"learning_rate": 7.58e-05,
"loss": 0.1122,
"step": 380
},
{
"grad_norm": 0.9855173230171204,
"learning_rate": 7.780000000000001e-05,
"loss": 0.1044,
"step": 390
},
{
"grad_norm": 1.3088891506195068,
"learning_rate": 7.98e-05,
"loss": 0.0922,
"step": 400
},
{
"grad_norm": 1.064687967300415,
"learning_rate": 8.18e-05,
"loss": 0.0824,
"step": 410
},
{
"grad_norm": 1.112962007522583,
"learning_rate": 8.38e-05,
"loss": 0.077,
"step": 420
},
{
"grad_norm": 1.0096707344055176,
"learning_rate": 8.58e-05,
"loss": 0.0816,
"step": 430
},
{
"grad_norm": 0.956439733505249,
"learning_rate": 8.78e-05,
"loss": 0.0721,
"step": 440
},
{
"grad_norm": 0.841948926448822,
"learning_rate": 8.98e-05,
"loss": 0.0723,
"step": 450
},
{
"grad_norm": 1.0005617141723633,
"learning_rate": 9.180000000000001e-05,
"loss": 0.0676,
"step": 460
},
{
"grad_norm": 0.8577463030815125,
"learning_rate": 9.38e-05,
"loss": 0.0647,
"step": 470
},
{
"grad_norm": 0.9084122776985168,
"learning_rate": 9.58e-05,
"loss": 0.0677,
"step": 480
},
{
"grad_norm": 1.0833115577697754,
"learning_rate": 9.78e-05,
"loss": 0.0653,
"step": 490
},
{
"grad_norm": 0.8259796500205994,
"learning_rate": 9.98e-05,
"loss": 0.0669,
"step": 500
},
{
"grad_norm": 0.8890817165374756,
"learning_rate": 9.9999778549206e-05,
"loss": 0.0611,
"step": 510
},
{
"grad_norm": 0.8948380351066589,
"learning_rate": 9.999901304280685e-05,
"loss": 0.0622,
"step": 520
},
{
"grad_norm": 0.848558247089386,
"learning_rate": 9.999770075521164e-05,
"loss": 0.0625,
"step": 530
},
{
"grad_norm": 0.7437359094619751,
"learning_rate": 9.99958417007713e-05,
"loss": 0.0626,
"step": 540
},
{
"grad_norm": 0.8651896715164185,
"learning_rate": 9.999343589981615e-05,
"loss": 0.0531,
"step": 550
},
{
"grad_norm": 0.8850076198577881,
"learning_rate": 9.999048337865568e-05,
"loss": 0.0615,
"step": 560
},
{
"grad_norm": 1.15691077709198,
"learning_rate": 9.998698416957815e-05,
"loss": 0.0641,
"step": 570
},
{
"grad_norm": 0.7086659073829651,
"learning_rate": 9.998293831085037e-05,
"loss": 0.0596,
"step": 580
},
{
"grad_norm": 0.7347807288169861,
"learning_rate": 9.997834584671719e-05,
"loss": 0.0572,
"step": 590
},
{
"grad_norm": 0.7725936770439148,
"learning_rate": 9.997320682740107e-05,
"loss": 0.0595,
"step": 600
},
{
"grad_norm": 0.769038200378418,
"learning_rate": 9.996752130910149e-05,
"loss": 0.0549,
"step": 610
},
{
"grad_norm": 0.6636187434196472,
"learning_rate": 9.99612893539944e-05,
"loss": 0.0538,
"step": 620
},
{
"grad_norm": 0.7348890900611877,
"learning_rate": 9.995451103023144e-05,
"loss": 0.0527,
"step": 630
},
{
"grad_norm": 0.7872657179832458,
"learning_rate": 9.994718641193928e-05,
"loss": 0.0557,
"step": 640
},
{
"grad_norm": 0.6932393312454224,
"learning_rate": 9.993931557921874e-05,
"loss": 0.0548,
"step": 650
},
{
"grad_norm": 0.7634221911430359,
"learning_rate": 9.993089861814402e-05,
"loss": 0.0524,
"step": 660
},
{
"grad_norm": 0.7409372925758362,
"learning_rate": 9.992193562076166e-05,
"loss": 0.0496,
"step": 670
},
{
"grad_norm": 0.7612417936325073,
"learning_rate": 9.991242668508954e-05,
"loss": 0.0461,
"step": 680
},
{
"grad_norm": 0.7743764519691467,
"learning_rate": 9.990237191511587e-05,
"loss": 0.0435,
"step": 690
},
{
"grad_norm": 0.725627601146698,
"learning_rate": 9.989177142079802e-05,
"loss": 0.0471,
"step": 700
},
{
"grad_norm": 0.5516918301582336,
"learning_rate": 9.988062531806126e-05,
"loss": 0.0462,
"step": 710
},
{
"grad_norm": 0.730378270149231,
"learning_rate": 9.986893372879762e-05,
"loss": 0.0487,
"step": 720
},
{
"grad_norm": 0.6389397382736206,
"learning_rate": 9.985669678086443e-05,
"loss": 0.0488,
"step": 730
},
{
"grad_norm": 0.8831709623336792,
"learning_rate": 9.984391460808298e-05,
"loss": 0.0514,
"step": 740
},
{
"grad_norm": 0.6661105751991272,
"learning_rate": 9.983058735023709e-05,
"loss": 0.0514,
"step": 750
},
{
"grad_norm": 0.7856804728507996,
"learning_rate": 9.98167151530715e-05,
"loss": 0.0471,
"step": 760
},
{
"grad_norm": 0.7654492259025574,
"learning_rate": 9.980229816829034e-05,
"loss": 0.0505,
"step": 770
},
{
"grad_norm": 0.6101555228233337,
"learning_rate": 9.978733655355544e-05,
"loss": 0.047,
"step": 780
},
{
"grad_norm": 0.7730712890625,
"learning_rate": 9.977183047248464e-05,
"loss": 0.0424,
"step": 790
},
{
"grad_norm": 0.8450173139572144,
"learning_rate": 9.975578009464992e-05,
"loss": 0.0455,
"step": 800
},
{
"grad_norm": 0.5586540102958679,
"learning_rate": 9.97391855955757e-05,
"loss": 0.04,
"step": 810
},
{
"grad_norm": 0.8502600789070129,
"learning_rate": 9.972204715673669e-05,
"loss": 0.0418,
"step": 820
},
{
"grad_norm": 0.6090761423110962,
"learning_rate": 9.970436496555617e-05,
"loss": 0.042,
"step": 830
},
{
"grad_norm": 0.6097173690795898,
"learning_rate": 9.968613921540373e-05,
"loss": 0.0451,
"step": 840
},
{
"grad_norm": 0.765418291091919,
"learning_rate": 9.966737010559326e-05,
"loss": 0.0447,
"step": 850
},
{
"grad_norm": 0.7200655937194824,
"learning_rate": 9.964805784138072e-05,
"loss": 0.0439,
"step": 860
},
{
"grad_norm": 0.6888765692710876,
"learning_rate": 9.962820263396195e-05,
"loss": 0.0416,
"step": 870
},
{
"grad_norm": 0.5708920359611511,
"learning_rate": 9.960780470047033e-05,
"loss": 0.0459,
"step": 880
},
{
"grad_norm": 0.7507001757621765,
"learning_rate": 9.958686426397437e-05,
"loss": 0.0425,
"step": 890
},
{
"grad_norm": 0.5076937079429626,
"learning_rate": 9.956538155347534e-05,
"loss": 0.0455,
"step": 900
},
{
"grad_norm": 0.5799984335899353,
"learning_rate": 9.95433568039047e-05,
"loss": 0.0399,
"step": 910
},
{
"grad_norm": 0.6337814927101135,
"learning_rate": 9.952079025612162e-05,
"loss": 0.0381,
"step": 920
},
{
"grad_norm": 0.7000153660774231,
"learning_rate": 9.949768215691022e-05,
"loss": 0.0411,
"step": 930
},
{
"grad_norm": 0.5318272709846497,
"learning_rate": 9.9474032758977e-05,
"loss": 0.0401,
"step": 940
},
{
"grad_norm": 0.700434148311615,
"learning_rate": 9.944984232094794e-05,
"loss": 0.0435,
"step": 950
},
{
"grad_norm": 0.605954647064209,
"learning_rate": 9.942511110736584e-05,
"loss": 0.0411,
"step": 960
},
{
"grad_norm": 0.5715162754058838,
"learning_rate": 9.939983938868726e-05,
"loss": 0.0414,
"step": 970
},
{
"grad_norm": 0.6310116648674011,
"learning_rate": 9.93740274412797e-05,
"loss": 0.0383,
"step": 980
},
{
"grad_norm": 0.680823564529419,
"learning_rate": 9.934767554741846e-05,
"loss": 0.0457,
"step": 990
},
{
"grad_norm": 0.632407546043396,
"learning_rate": 9.932078399528361e-05,
"loss": 0.0374,
"step": 1000
},
{
"grad_norm": 0.5892583727836609,
"learning_rate": 9.929335307895689e-05,
"loss": 0.0368,
"step": 1010
},
{
"grad_norm": 0.6278207898139954,
"learning_rate": 9.926538309841839e-05,
"loss": 0.0434,
"step": 1020
},
{
"grad_norm": 0.5285525321960449,
"learning_rate": 9.923687435954334e-05,
"loss": 0.0363,
"step": 1030
},
{
"grad_norm": 0.6097428798675537,
"learning_rate": 9.920782717409873e-05,
"loss": 0.0348,
"step": 1040
},
{
"grad_norm": 0.6607808470726013,
"learning_rate": 9.917824185973994e-05,
"loss": 0.0344,
"step": 1050
},
{
"grad_norm": 0.5603345036506653,
"learning_rate": 9.914811874000723e-05,
"loss": 0.0339,
"step": 1060
},
{
"grad_norm": 0.5727225542068481,
"learning_rate": 9.911745814432218e-05,
"loss": 0.0371,
"step": 1070
},
{
"grad_norm": 0.5944136381149292,
"learning_rate": 9.90862604079842e-05,
"loss": 0.0398,
"step": 1080
},
{
"grad_norm": 0.6860123872756958,
"learning_rate": 9.90545258721667e-05,
"loss": 0.0378,
"step": 1090
},
{
"grad_norm": 0.6331652402877808,
"learning_rate": 9.90222548839135e-05,
"loss": 0.0352,
"step": 1100
},
{
"grad_norm": 0.5666359066963196,
"learning_rate": 9.898944779613495e-05,
"loss": 0.034,
"step": 1110
},
{
"grad_norm": 0.5733403563499451,
"learning_rate": 9.89561049676041e-05,
"loss": 0.0352,
"step": 1120
},
{
"grad_norm": 0.5164110660552979,
"learning_rate": 9.89222267629528e-05,
"loss": 0.0379,
"step": 1130
},
{
"grad_norm": 0.6303825378417969,
"learning_rate": 9.888781355266763e-05,
"loss": 0.0369,
"step": 1140
},
{
"grad_norm": 0.5613416433334351,
"learning_rate": 9.885286571308598e-05,
"loss": 0.0338,
"step": 1150
},
{
"grad_norm": 0.6414242386817932,
"learning_rate": 9.881738362639182e-05,
"loss": 0.0375,
"step": 1160
},
{
"grad_norm": 0.5172221660614014,
"learning_rate": 9.878136768061154e-05,
"loss": 0.0376,
"step": 1170
},
{
"grad_norm": 0.6341432332992554,
"learning_rate": 9.874481826960979e-05,
"loss": 0.0374,
"step": 1180
},
{
"grad_norm": 0.624677836894989,
"learning_rate": 9.870773579308503e-05,
"loss": 0.0341,
"step": 1190
},
{
"grad_norm": 0.4869997203350067,
"learning_rate": 9.867012065656533e-05,
"loss": 0.0381,
"step": 1200
},
{
"grad_norm": 0.5759740471839905,
"learning_rate": 9.863197327140376e-05,
"loss": 0.0333,
"step": 1210
},
{
"grad_norm": 0.48775139451026917,
"learning_rate": 9.859329405477403e-05,
"loss": 0.0331,
"step": 1220
},
{
"grad_norm": 0.6388097405433655,
"learning_rate": 9.855408342966585e-05,
"loss": 0.0352,
"step": 1230
},
{
"grad_norm": 0.5959818363189697,
"learning_rate": 9.851434182488033e-05,
"loss": 0.0338,
"step": 1240
},
{
"grad_norm": 0.657508373260498,
"learning_rate": 9.84740696750253e-05,
"loss": 0.0331,
"step": 1250
},
{
"grad_norm": 0.7012799978256226,
"learning_rate": 9.843326742051055e-05,
"loss": 0.0348,
"step": 1260
},
{
"grad_norm": 0.5348427295684814,
"learning_rate": 9.839193550754297e-05,
"loss": 0.0337,
"step": 1270
},
{
"grad_norm": 0.7294585704803467,
"learning_rate": 9.835007438812177e-05,
"loss": 0.038,
"step": 1280
},
{
"grad_norm": 0.6077402830123901,
"learning_rate": 9.830768452003341e-05,
"loss": 0.0342,
"step": 1290
},
{
"grad_norm": 0.5021491050720215,
"learning_rate": 9.826476636684671e-05,
"loss": 0.0339,
"step": 1300
},
{
"grad_norm": 0.42891937494277954,
"learning_rate": 9.822132039790773e-05,
"loss": 0.0322,
"step": 1310
},
{
"grad_norm": 0.5746376514434814,
"learning_rate": 9.817734708833461e-05,
"loss": 0.0302,
"step": 1320
},
{
"grad_norm": 0.591606616973877,
"learning_rate": 9.813284691901243e-05,
"loss": 0.039,
"step": 1330
},
{
"grad_norm": 0.5928114056587219,
"learning_rate": 9.808782037658792e-05,
"loss": 0.0367,
"step": 1340
},
{
"grad_norm": 0.5678219199180603,
"learning_rate": 9.804226795346411e-05,
"loss": 0.0343,
"step": 1350
},
{
"grad_norm": 0.5018511414527893,
"learning_rate": 9.799619014779503e-05,
"loss": 0.0331,
"step": 1360
},
{
"grad_norm": 0.5295028686523438,
"learning_rate": 9.794958746348013e-05,
"loss": 0.0337,
"step": 1370
},
{
"grad_norm": 0.6938942074775696,
"learning_rate": 9.790246041015896e-05,
"loss": 0.0306,
"step": 1380
},
{
"grad_norm": 0.5297317504882812,
"learning_rate": 9.785480950320538e-05,
"loss": 0.0331,
"step": 1390
},
{
"grad_norm": 0.637657105922699,
"learning_rate": 9.78066352637221e-05,
"loss": 0.0311,
"step": 1400
},
{
"grad_norm": 0.5819315314292908,
"learning_rate": 9.775793821853488e-05,
"loss": 0.0327,
"step": 1410
},
{
"grad_norm": 0.7160147428512573,
"learning_rate": 9.77087189001868e-05,
"loss": 0.0323,
"step": 1420
},
{
"grad_norm": 0.7221500873565674,
"learning_rate": 9.765897784693243e-05,
"loss": 0.0332,
"step": 1430
},
{
"grad_norm": 0.5845819711685181,
"learning_rate": 9.760871560273197e-05,
"loss": 0.0312,
"step": 1440
},
{
"grad_norm": 0.5930690765380859,
"learning_rate": 9.755793271724526e-05,
"loss": 0.0305,
"step": 1450
},
{
"grad_norm": 0.4570452570915222,
"learning_rate": 9.750662974582584e-05,
"loss": 0.0372,
"step": 1460
},
{
"grad_norm": 0.5543919801712036,
"learning_rate": 9.745480724951473e-05,
"loss": 0.0314,
"step": 1470
},
{
"grad_norm": 0.5798304677009583,
"learning_rate": 9.740246579503447e-05,
"loss": 0.0336,
"step": 1480
},
{
"grad_norm": 0.5464045405387878,
"learning_rate": 9.734960595478284e-05,
"loss": 0.032,
"step": 1490
},
{
"grad_norm": 0.5292957425117493,
"learning_rate": 9.729622830682657e-05,
"loss": 0.0308,
"step": 1500
},
{
"grad_norm": 0.4644886255264282,
"learning_rate": 9.724233343489504e-05,
"loss": 0.0341,
"step": 1510
},
{
"grad_norm": 0.4468748867511749,
"learning_rate": 9.718792192837396e-05,
"loss": 0.029,
"step": 1520
},
{
"grad_norm": 0.5442079305648804,
"learning_rate": 9.713299438229886e-05,
"loss": 0.0337,
"step": 1530
},
{
"grad_norm": 0.46528083086013794,
"learning_rate": 9.707755139734855e-05,
"loss": 0.0338,
"step": 1540
},
{
"grad_norm": 0.5214895009994507,
"learning_rate": 9.702159357983866e-05,
"loss": 0.0315,
"step": 1550
},
{
"grad_norm": 0.564447820186615,
"learning_rate": 9.696512154171492e-05,
"loss": 0.0329,
"step": 1560
},
{
"grad_norm": 0.471500962972641,
"learning_rate": 9.690813590054645e-05,
"loss": 0.0326,
"step": 1570
},
{
"grad_norm": 0.52679044008255,
"learning_rate": 9.685063727951914e-05,
"loss": 0.0305,
"step": 1580
},
{
"grad_norm": 0.4842182993888855,
"learning_rate": 9.679262630742865e-05,
"loss": 0.0317,
"step": 1590
},
{
"grad_norm": 0.5807623267173767,
"learning_rate": 9.673410361867373e-05,
"loss": 0.0336,
"step": 1600
},
{
"grad_norm": 0.41651445627212524,
"learning_rate": 9.667506985324909e-05,
"loss": 0.031,
"step": 1610
},
{
"grad_norm": 0.4714881479740143,
"learning_rate": 9.661552565673855e-05,
"loss": 0.028,
"step": 1620
},
{
"grad_norm": 0.4803926944732666,
"learning_rate": 9.655547168030789e-05,
"loss": 0.0321,
"step": 1630
},
{
"grad_norm": 0.5974353551864624,
"learning_rate": 9.649490858069777e-05,
"loss": 0.0295,
"step": 1640
},
{
"grad_norm": 0.4964613914489746,
"learning_rate": 9.643383702021658e-05,
"loss": 0.0297,
"step": 1650
},
{
"grad_norm": 0.6017008423805237,
"learning_rate": 9.637225766673307e-05,
"loss": 0.0286,
"step": 1660
},
{
"grad_norm": 0.5116117000579834,
"learning_rate": 9.631017119366922e-05,
"loss": 0.0296,
"step": 1670
},
{
"grad_norm": 0.5510458946228027,
"learning_rate": 9.624757827999273e-05,
"loss": 0.0322,
"step": 1680
},
{
"grad_norm": 0.41116780042648315,
"learning_rate": 9.618447961020971e-05,
"loss": 0.0365,
"step": 1690
},
{
"grad_norm": 0.4938806891441345,
"learning_rate": 9.612087587435707e-05,
"loss": 0.0346,
"step": 1700
},
{
"grad_norm": 0.5194259881973267,
"learning_rate": 9.605676776799508e-05,
"loss": 0.0311,
"step": 1710
},
{
"grad_norm": 0.4529009163379669,
"learning_rate": 9.599215599219973e-05,
"loss": 0.0306,
"step": 1720
},
{
"grad_norm": 0.4386800229549408,
"learning_rate": 9.592704125355505e-05,
"loss": 0.0303,
"step": 1730
},
{
"grad_norm": 0.44015586376190186,
"learning_rate": 9.586142426414538e-05,
"loss": 0.0291,
"step": 1740
},
{
"grad_norm": 0.5530741810798645,
"learning_rate": 9.57953057415476e-05,
"loss": 0.0328,
"step": 1750
},
{
"grad_norm": 0.28225114941596985,
"learning_rate": 9.572868640882328e-05,
"loss": 0.0296,
"step": 1760
},
{
"grad_norm": 0.6074041724205017,
"learning_rate": 9.56615669945108e-05,
"loss": 0.0324,
"step": 1770
},
{
"grad_norm": 0.5109390616416931,
"learning_rate": 9.55939482326173e-05,
"loss": 0.03,
"step": 1780
},
{
"grad_norm": 0.5892201662063599,
"learning_rate": 9.552583086261069e-05,
"loss": 0.0316,
"step": 1790
},
{
"grad_norm": 0.4495730400085449,
"learning_rate": 9.545721562941168e-05,
"loss": 0.0295,
"step": 1800
},
{
"grad_norm": 0.5142664313316345,
"learning_rate": 9.538810328338543e-05,
"loss": 0.0277,
"step": 1810
},
{
"grad_norm": 0.4616416394710541,
"learning_rate": 9.531849458033349e-05,
"loss": 0.0307,
"step": 1820
},
{
"grad_norm": 0.4885185658931732,
"learning_rate": 9.524839028148547e-05,
"loss": 0.0298,
"step": 1830
},
{
"grad_norm": 0.4711757004261017,
"learning_rate": 9.517779115349077e-05,
"loss": 0.0304,
"step": 1840
},
{
"grad_norm": 0.4843687117099762,
"learning_rate": 9.510669796841014e-05,
"loss": 0.0301,
"step": 1850
},
{
"grad_norm": 0.5420807003974915,
"learning_rate": 9.503511150370727e-05,
"loss": 0.0326,
"step": 1860
},
{
"grad_norm": 0.644017219543457,
"learning_rate": 9.496303254224024e-05,
"loss": 0.0318,
"step": 1870
},
{
"grad_norm": 0.4648231565952301,
"learning_rate": 9.489046187225306e-05,
"loss": 0.0301,
"step": 1880
},
{
"grad_norm": 0.5046685338020325,
"learning_rate": 9.481740028736692e-05,
"loss": 0.0314,
"step": 1890
},
{
"grad_norm": 0.49768561124801636,
"learning_rate": 9.474384858657164e-05,
"loss": 0.0291,
"step": 1900
},
{
"grad_norm": 0.5587893724441528,
"learning_rate": 9.466980757421679e-05,
"loss": 0.0296,
"step": 1910
},
{
"grad_norm": 0.5340442061424255,
"learning_rate": 9.459527806000305e-05,
"loss": 0.0313,
"step": 1920
},
{
"grad_norm": 0.5392602682113647,
"learning_rate": 9.452026085897325e-05,
"loss": 0.0308,
"step": 1930
},
{
"grad_norm": 0.4618771970272064,
"learning_rate": 9.444475679150348e-05,
"loss": 0.0296,
"step": 1940
},
{
"grad_norm": 0.4055277705192566,
"learning_rate": 9.436876668329411e-05,
"loss": 0.028,
"step": 1950
},
{
"grad_norm": 0.5005772113800049,
"learning_rate": 9.429229136536079e-05,
"loss": 0.0273,
"step": 1960
},
{
"grad_norm": 0.42232707142829895,
"learning_rate": 9.421533167402534e-05,
"loss": 0.0286,
"step": 1970
},
{
"grad_norm": 0.5429880619049072,
"learning_rate": 9.413788845090666e-05,
"loss": 0.029,
"step": 1980
},
{
"grad_norm": 0.4448404312133789,
"learning_rate": 9.405996254291136e-05,
"loss": 0.0284,
"step": 1990
},
{
"grad_norm": 0.5074642300605774,
"learning_rate": 9.398155480222474e-05,
"loss": 0.0283,
"step": 2000
},
{
"grad_norm": 0.4470667243003845,
"learning_rate": 9.390266608630128e-05,
"loss": 0.0267,
"step": 2010
},
{
"grad_norm": 0.47320127487182617,
"learning_rate": 9.38232972578553e-05,
"loss": 0.0303,
"step": 2020
},
{
"grad_norm": 0.5718346238136292,
"learning_rate": 9.374344918485164e-05,
"loss": 0.0296,
"step": 2030
},
{
"grad_norm": 0.4110424518585205,
"learning_rate": 9.366312274049602e-05,
"loss": 0.028,
"step": 2040
},
{
"grad_norm": 0.41520369052886963,
"learning_rate": 9.358231880322554e-05,
"loss": 0.0296,
"step": 2050
},
{
"grad_norm": 0.4130535423755646,
"learning_rate": 9.350103825669916e-05,
"loss": 0.0286,
"step": 2060
},
{
"grad_norm": 0.5143803358078003,
"learning_rate": 9.341928198978787e-05,
"loss": 0.0285,
"step": 2070
},
{
"grad_norm": 0.5418136119842529,
"learning_rate": 9.333705089656512e-05,
"loss": 0.0264,
"step": 2080
},
{
"grad_norm": 0.46870583295822144,
"learning_rate": 9.325434587629698e-05,
"loss": 0.0317,
"step": 2090
},
{
"grad_norm": 0.417431116104126,
"learning_rate": 9.31711678334323e-05,
"loss": 0.0284,
"step": 2100
},
{
"grad_norm": 0.49152880907058716,
"learning_rate": 9.308751767759282e-05,
"loss": 0.025,
"step": 2110
},
{
"grad_norm": 0.378698468208313,
"learning_rate": 9.300339632356325e-05,
"loss": 0.027,
"step": 2120
},
{
"grad_norm": 0.4329814016819,
"learning_rate": 9.291880469128124e-05,
"loss": 0.0299,
"step": 2130
},
{
"grad_norm": 0.49008893966674805,
"learning_rate": 9.283374370582732e-05,
"loss": 0.0273,
"step": 2140
},
{
"grad_norm": 0.3793398141860962,
"learning_rate": 9.274821429741482e-05,
"loss": 0.0264,
"step": 2150
},
{
"grad_norm": 0.3981456458568573,
"learning_rate": 9.266221740137961e-05,
"loss": 0.026,
"step": 2160
},
{
"grad_norm": 0.5248379707336426,
"learning_rate": 9.257575395817001e-05,
"loss": 0.0255,
"step": 2170
},
{
"grad_norm": 0.39933085441589355,
"learning_rate": 9.248882491333637e-05,
"loss": 0.0261,
"step": 2180
},
{
"grad_norm": 0.5205438733100891,
"learning_rate": 9.240143121752076e-05,
"loss": 0.0253,
"step": 2190
},
{
"grad_norm": 0.4256397783756256,
"learning_rate": 9.23135738264467e-05,
"loss": 0.0313,
"step": 2200
},
{
"grad_norm": 0.4098120629787445,
"learning_rate": 9.222525370090849e-05,
"loss": 0.0286,
"step": 2210
},
{
"grad_norm": 0.452364444732666,
"learning_rate": 9.213647180676088e-05,
"loss": 0.0313,
"step": 2220
},
{
"grad_norm": 0.5283573269844055,
"learning_rate": 9.204722911490846e-05,
"loss": 0.027,
"step": 2230
},
{
"grad_norm": 0.4341718852519989,
"learning_rate": 9.1957526601295e-05,
"loss": 0.026,
"step": 2240
},
{
"grad_norm": 0.5589078664779663,
"learning_rate": 9.186736524689281e-05,
"loss": 0.0277,
"step": 2250
},
{
"grad_norm": 0.3732184171676636,
"learning_rate": 9.177674603769204e-05,
"loss": 0.0289,
"step": 2260
},
{
"grad_norm": 0.39962926506996155,
"learning_rate": 9.168566996468983e-05,
"loss": 0.0262,
"step": 2270
},
{
"grad_norm": 0.43201372027397156,
"learning_rate": 9.159413802387951e-05,
"loss": 0.0239,
"step": 2280
},
{
"grad_norm": 0.4189751446247101,
"learning_rate": 9.150215121623974e-05,
"loss": 0.0266,
"step": 2290
},
{
"grad_norm": 0.3986872136592865,
"learning_rate": 9.140971054772349e-05,
"loss": 0.0255,
"step": 2300
},
{
"grad_norm": 0.4998125731945038,
"learning_rate": 9.131681702924713e-05,
"loss": 0.0281,
"step": 2310
},
{
"grad_norm": 0.4827892482280731,
"learning_rate": 9.122347167667926e-05,
"loss": 0.0281,
"step": 2320
},
{
"grad_norm": 0.4876689016819,
"learning_rate": 9.112967551082973e-05,
"loss": 0.0319,
"step": 2330
},
{
"grad_norm": 0.36984163522720337,
"learning_rate": 9.103542955743835e-05,
"loss": 0.0242,
"step": 2340
},
{
"grad_norm": 0.465818852186203,
"learning_rate": 9.094073484716381e-05,
"loss": 0.0314,
"step": 2350
},
{
"grad_norm": 0.37877270579338074,
"learning_rate": 9.084559241557226e-05,
"loss": 0.0262,
"step": 2360
},
{
"grad_norm": 0.4463783800601959,
"learning_rate": 9.075000330312608e-05,
"loss": 0.0263,
"step": 2370
},
{
"grad_norm": 0.47452881932258606,
"learning_rate": 9.065396855517253e-05,
"loss": 0.0272,
"step": 2380
},
{
"grad_norm": 0.424927681684494,
"learning_rate": 9.055748922193219e-05,
"loss": 0.0278,
"step": 2390
},
{
"grad_norm": 0.3524123728275299,
"learning_rate": 9.046056635848761e-05,
"loss": 0.0268,
"step": 2400
},
{
"grad_norm": 0.39357349276542664,
"learning_rate": 9.036320102477169e-05,
"loss": 0.0235,
"step": 2410
},
{
"grad_norm": 0.38801810145378113,
"learning_rate": 9.02653942855561e-05,
"loss": 0.0309,
"step": 2420
},
{
"grad_norm": 0.42541712522506714,
"learning_rate": 9.016714721043971e-05,
"loss": 0.027,
"step": 2430
},
{
"grad_norm": 0.42861104011535645,
"learning_rate": 9.006846087383675e-05,
"loss": 0.0274,
"step": 2440
},
{
"grad_norm": 0.44381630420684814,
"learning_rate": 8.996933635496523e-05,
"loss": 0.0264,
"step": 2450
},
{
"grad_norm": 0.5069416761398315,
"learning_rate": 8.986977473783498e-05,
"loss": 0.0243,
"step": 2460
},
{
"grad_norm": 0.5593004822731018,
"learning_rate": 8.97697771112359e-05,
"loss": 0.0266,
"step": 2470
},
{
"grad_norm": 0.49196624755859375,
"learning_rate": 8.966934456872602e-05,
"loss": 0.0254,
"step": 2480
},
{
"grad_norm": 0.42328518629074097,
"learning_rate": 8.95684782086195e-05,
"loss": 0.0317,
"step": 2490
},
{
"grad_norm": 0.40058237314224243,
"learning_rate": 8.946717913397476e-05,
"loss": 0.0257,
"step": 2500
},
{
"grad_norm": 0.45824214816093445,
"learning_rate": 8.93654484525822e-05,
"loss": 0.0267,
"step": 2510
},
{
"grad_norm": 0.47785720229148865,
"learning_rate": 8.926328727695226e-05,
"loss": 0.026,
"step": 2520
},
{
"grad_norm": 0.5189729928970337,
"learning_rate": 8.916069672430319e-05,
"loss": 0.0264,
"step": 2530
},
{
"grad_norm": 0.3164174556732178,
"learning_rate": 8.905767791654884e-05,
"loss": 0.0244,
"step": 2540
},
{
"grad_norm": 0.45995235443115234,
"learning_rate": 8.895423198028638e-05,
"loss": 0.0272,
"step": 2550
},
{
"grad_norm": 0.40050774812698364,
"learning_rate": 8.885036004678402e-05,
"loss": 0.0292,
"step": 2560
},
{
"grad_norm": 0.3644542098045349,
"learning_rate": 8.874606325196857e-05,
"loss": 0.0237,
"step": 2570
},
{
"grad_norm": 0.4536350667476654,
"learning_rate": 8.864134273641304e-05,
"loss": 0.025,
"step": 2580
},
{
"grad_norm": 0.35245734453201294,
"learning_rate": 8.853619964532427e-05,
"loss": 0.0233,
"step": 2590
},
{
"grad_norm": 0.4429668188095093,
"learning_rate": 8.843063512853019e-05,
"loss": 0.0285,
"step": 2600
},
{
"grad_norm": 0.43946343660354614,
"learning_rate": 8.832465034046749e-05,
"loss": 0.0263,
"step": 2610
},
{
"grad_norm": 0.4406358599662781,
"learning_rate": 8.821824644016882e-05,
"loss": 0.0254,
"step": 2620
},
{
"grad_norm": 0.48885712027549744,
"learning_rate": 8.811142459125019e-05,
"loss": 0.025,
"step": 2630
},
{
"grad_norm": 0.42471176385879517,
"learning_rate": 8.800418596189822e-05,
"loss": 0.0265,
"step": 2640
},
{
"grad_norm": 0.3454952836036682,
"learning_rate": 8.789653172485737e-05,
"loss": 0.0261,
"step": 2650
},
{
"grad_norm": 0.4365542232990265,
"learning_rate": 8.778846305741715e-05,
"loss": 0.0253,
"step": 2660
},
{
"grad_norm": 0.3438829779624939,
"learning_rate": 8.767998114139918e-05,
"loss": 0.0251,
"step": 2670
},
{
"grad_norm": 0.3312196433544159,
"learning_rate": 8.757108716314429e-05,
"loss": 0.0254,
"step": 2680
},
{
"grad_norm": 0.40338999032974243,
"learning_rate": 8.746178231349962e-05,
"loss": 0.0275,
"step": 2690
},
{
"grad_norm": 0.4243628978729248,
"learning_rate": 8.735206778780549e-05,
"loss": 0.0239,
"step": 2700
},
{
"grad_norm": 0.4020898938179016,
"learning_rate": 8.724194478588234e-05,
"loss": 0.0234,
"step": 2710
},
{
"grad_norm": 0.4327259361743927,
"learning_rate": 8.713141451201772e-05,
"loss": 0.0248,
"step": 2720
},
{
"grad_norm": 0.3352695107460022,
"learning_rate": 8.702047817495295e-05,
"loss": 0.0258,
"step": 2730
},
{
"grad_norm": 0.3333274722099304,
"learning_rate": 8.69091369878701e-05,
"loss": 0.0238,
"step": 2740
},
{
"grad_norm": 0.42753326892852783,
"learning_rate": 8.679739216837849e-05,
"loss": 0.0222,
"step": 2750
},
{
"grad_norm": 0.3095396161079407,
"learning_rate": 8.66852449385016e-05,
"loss": 0.0233,
"step": 2760
},
{
"grad_norm": 0.3271157741546631,
"learning_rate": 8.657269652466356e-05,
"loss": 0.0267,
"step": 2770
},
{
"grad_norm": 0.4156598150730133,
"learning_rate": 8.645974815767577e-05,
"loss": 0.0225,
"step": 2780
},
{
"grad_norm": 0.35358086228370667,
"learning_rate": 8.634640107272351e-05,
"loss": 0.023,
"step": 2790
},
{
"grad_norm": 0.43658044934272766,
"learning_rate": 8.623265650935234e-05,
"loss": 0.0256,
"step": 2800
},
{
"grad_norm": 0.39249366521835327,
"learning_rate": 8.611851571145456e-05,
"loss": 0.0256,
"step": 2810
},
{
"grad_norm": 0.4045146107673645,
"learning_rate": 8.600397992725566e-05,
"loss": 0.0265,
"step": 2820
},
{
"grad_norm": 0.46040022373199463,
"learning_rate": 8.588905040930061e-05,
"loss": 0.0223,
"step": 2830
},
{
"grad_norm": 0.4740993082523346,
"learning_rate": 8.577372841444022e-05,
"loss": 0.0238,
"step": 2840
},
{
"grad_norm": 0.5013309717178345,
"learning_rate": 8.565801520381736e-05,
"loss": 0.0244,
"step": 2850
},
{
"grad_norm": 0.5572243332862854,
"learning_rate": 8.554191204285313e-05,
"loss": 0.0278,
"step": 2860
},
{
"grad_norm": 0.4136684536933899,
"learning_rate": 8.542542020123315e-05,
"loss": 0.0268,
"step": 2870
},
{
"grad_norm": 0.4344552159309387,
"learning_rate": 8.530854095289347e-05,
"loss": 0.0237,
"step": 2880
},
{
"grad_norm": 0.44237616658210754,
"learning_rate": 8.519127557600688e-05,
"loss": 0.0258,
"step": 2890
},
{
"grad_norm": 0.40503379702568054,
"learning_rate": 8.507362535296871e-05,
"loss": 0.0245,
"step": 2900
},
{
"grad_norm": 0.4115789234638214,
"learning_rate": 8.495559157038299e-05,
"loss": 0.0228,
"step": 2910
},
{
"grad_norm": 0.47725898027420044,
"learning_rate": 8.483717551904823e-05,
"loss": 0.0255,
"step": 2920
},
{
"grad_norm": 0.39906537532806396,
"learning_rate": 8.47183784939434e-05,
"loss": 0.028,
"step": 2930
},
{
"grad_norm": 0.3738861083984375,
"learning_rate": 8.459920179421374e-05,
"loss": 0.0276,
"step": 2940
},
{
"grad_norm": 0.4364217519760132,
"learning_rate": 8.447964672315656e-05,
"loss": 0.0242,
"step": 2950
},
{
"grad_norm": 0.40446925163269043,
"learning_rate": 8.435971458820692e-05,
"loss": 0.0245,
"step": 2960
},
{
"grad_norm": 0.43057939410209656,
"learning_rate": 8.423940670092345e-05,
"loss": 0.0229,
"step": 2970
},
{
"grad_norm": 0.3856443464756012,
"learning_rate": 8.411872437697394e-05,
"loss": 0.0217,
"step": 2980
},
{
"grad_norm": 0.3364211320877075,
"learning_rate": 8.399766893612096e-05,
"loss": 0.0223,
"step": 2990
},
{
"grad_norm": 0.47767484188079834,
"learning_rate": 8.38762417022074e-05,
"loss": 0.0231,
"step": 3000
}
],
"logging_steps": 10,
"max_steps": 10000,
"num_input_tokens_seen": 0,
"num_train_epochs": 9223372036854775807,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}