moussaKam
Add model
04487cb
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"global_step": 421875,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 2.9964515555555556e-05,
"loss": 0.0351,
"step": 500
},
{
"epoch": 0.01,
"learning_rate": 2.9928959999999998e-05,
"loss": 0.0074,
"step": 1000
},
{
"epoch": 0.01,
"learning_rate": 2.9893404444444444e-05,
"loss": 0.0063,
"step": 1500
},
{
"epoch": 0.01,
"learning_rate": 2.985784888888889e-05,
"loss": 0.0055,
"step": 2000
},
{
"epoch": 0.02,
"learning_rate": 2.9822293333333335e-05,
"loss": 0.0051,
"step": 2500
},
{
"epoch": 0.02,
"learning_rate": 2.9786737777777777e-05,
"loss": 0.0047,
"step": 3000
},
{
"epoch": 0.02,
"learning_rate": 2.9751182222222222e-05,
"loss": 0.0043,
"step": 3500
},
{
"epoch": 0.03,
"learning_rate": 2.9715626666666668e-05,
"loss": 0.0042,
"step": 4000
},
{
"epoch": 0.03,
"learning_rate": 2.9680071111111114e-05,
"loss": 0.0039,
"step": 4500
},
{
"epoch": 0.04,
"learning_rate": 2.9644515555555556e-05,
"loss": 0.0038,
"step": 5000
},
{
"epoch": 0.04,
"learning_rate": 2.960896e-05,
"loss": 0.0036,
"step": 5500
},
{
"epoch": 0.04,
"learning_rate": 2.9573404444444447e-05,
"loss": 0.0034,
"step": 6000
},
{
"epoch": 0.05,
"learning_rate": 2.9537848888888892e-05,
"loss": 0.0033,
"step": 6500
},
{
"epoch": 0.05,
"learning_rate": 2.9502293333333334e-05,
"loss": 0.0033,
"step": 7000
},
{
"epoch": 0.05,
"learning_rate": 2.9466737777777777e-05,
"loss": 0.0032,
"step": 7500
},
{
"epoch": 0.06,
"learning_rate": 2.9431182222222222e-05,
"loss": 0.0031,
"step": 8000
},
{
"epoch": 0.06,
"learning_rate": 2.9395626666666668e-05,
"loss": 0.0032,
"step": 8500
},
{
"epoch": 0.06,
"learning_rate": 2.936007111111111e-05,
"loss": 0.0029,
"step": 9000
},
{
"epoch": 0.07,
"learning_rate": 2.9324515555555555e-05,
"loss": 0.0029,
"step": 9500
},
{
"epoch": 0.07,
"learning_rate": 2.928896e-05,
"loss": 0.0029,
"step": 10000
},
{
"epoch": 0.07,
"learning_rate": 2.9253475555555556e-05,
"loss": 0.0028,
"step": 10500
},
{
"epoch": 0.08,
"learning_rate": 2.9217919999999998e-05,
"loss": 0.0028,
"step": 11000
},
{
"epoch": 0.08,
"learning_rate": 2.9182364444444444e-05,
"loss": 0.0027,
"step": 11500
},
{
"epoch": 0.09,
"learning_rate": 2.914680888888889e-05,
"loss": 0.0027,
"step": 12000
},
{
"epoch": 0.09,
"learning_rate": 2.9111324444444445e-05,
"loss": 0.0028,
"step": 12500
},
{
"epoch": 0.09,
"learning_rate": 2.907576888888889e-05,
"loss": 0.0027,
"step": 13000
},
{
"epoch": 0.1,
"learning_rate": 2.9040213333333336e-05,
"loss": 0.0026,
"step": 13500
},
{
"epoch": 0.1,
"learning_rate": 2.900465777777778e-05,
"loss": 0.0026,
"step": 14000
},
{
"epoch": 0.1,
"learning_rate": 2.8969173333333333e-05,
"loss": 0.0026,
"step": 14500
},
{
"epoch": 0.11,
"learning_rate": 2.893361777777778e-05,
"loss": 0.0025,
"step": 15000
},
{
"epoch": 0.11,
"learning_rate": 2.8898062222222224e-05,
"loss": 0.0025,
"step": 15500
},
{
"epoch": 0.11,
"learning_rate": 2.886250666666667e-05,
"loss": 0.0025,
"step": 16000
},
{
"epoch": 0.12,
"learning_rate": 2.882702222222222e-05,
"loss": 0.0025,
"step": 16500
},
{
"epoch": 0.12,
"learning_rate": 2.8791466666666667e-05,
"loss": 0.0024,
"step": 17000
},
{
"epoch": 0.12,
"learning_rate": 2.8755911111111113e-05,
"loss": 0.0024,
"step": 17500
},
{
"epoch": 0.13,
"learning_rate": 2.8720355555555558e-05,
"loss": 0.0024,
"step": 18000
},
{
"epoch": 0.13,
"learning_rate": 2.868487111111111e-05,
"loss": 0.0024,
"step": 18500
},
{
"epoch": 0.14,
"learning_rate": 2.8649315555555555e-05,
"loss": 0.0024,
"step": 19000
},
{
"epoch": 0.14,
"learning_rate": 2.861376e-05,
"loss": 0.0024,
"step": 19500
},
{
"epoch": 0.14,
"learning_rate": 2.8578204444444443e-05,
"loss": 0.0023,
"step": 20000
},
{
"epoch": 0.15,
"learning_rate": 2.8542720000000002e-05,
"loss": 0.0023,
"step": 20500
},
{
"epoch": 0.15,
"learning_rate": 2.8507164444444447e-05,
"loss": 0.0023,
"step": 21000
},
{
"epoch": 0.15,
"learning_rate": 2.847160888888889e-05,
"loss": 0.0023,
"step": 21500
},
{
"epoch": 0.16,
"learning_rate": 2.843605333333333e-05,
"loss": 0.0023,
"step": 22000
},
{
"epoch": 0.16,
"learning_rate": 2.8400497777777777e-05,
"loss": 0.0022,
"step": 22500
},
{
"epoch": 0.16,
"learning_rate": 2.8365013333333336e-05,
"loss": 0.0022,
"step": 23000
},
{
"epoch": 0.17,
"learning_rate": 2.832945777777778e-05,
"loss": 0.0022,
"step": 23500
},
{
"epoch": 0.17,
"learning_rate": 2.8293902222222223e-05,
"loss": 0.0022,
"step": 24000
},
{
"epoch": 0.17,
"learning_rate": 2.825834666666667e-05,
"loss": 0.0021,
"step": 24500
},
{
"epoch": 0.18,
"learning_rate": 2.8222862222222224e-05,
"loss": 0.0023,
"step": 25000
},
{
"epoch": 0.18,
"learning_rate": 2.8187306666666666e-05,
"loss": 0.0022,
"step": 25500
},
{
"epoch": 0.18,
"learning_rate": 2.8151751111111112e-05,
"loss": 0.0021,
"step": 26000
},
{
"epoch": 0.19,
"learning_rate": 2.8116195555555557e-05,
"loss": 0.0021,
"step": 26500
},
{
"epoch": 0.19,
"learning_rate": 2.8080711111111113e-05,
"loss": 0.0021,
"step": 27000
},
{
"epoch": 0.2,
"learning_rate": 2.8045155555555555e-05,
"loss": 0.0022,
"step": 27500
},
{
"epoch": 0.2,
"learning_rate": 2.80096e-05,
"loss": 0.0022,
"step": 28000
},
{
"epoch": 0.2,
"learning_rate": 2.7974044444444446e-05,
"loss": 0.0022,
"step": 28500
},
{
"epoch": 0.21,
"learning_rate": 2.793856e-05,
"loss": 0.0021,
"step": 29000
},
{
"epoch": 0.21,
"learning_rate": 2.7903004444444443e-05,
"loss": 0.0021,
"step": 29500
},
{
"epoch": 0.21,
"learning_rate": 2.786744888888889e-05,
"loss": 0.0021,
"step": 30000
},
{
"epoch": 0.22,
"learning_rate": 2.7831893333333334e-05,
"loss": 0.0021,
"step": 30500
},
{
"epoch": 0.22,
"learning_rate": 2.779633777777778e-05,
"loss": 0.0021,
"step": 31000
},
{
"epoch": 0.22,
"learning_rate": 2.776085333333333e-05,
"loss": 0.0021,
"step": 31500
},
{
"epoch": 0.23,
"learning_rate": 2.7725297777777777e-05,
"loss": 0.0021,
"step": 32000
},
{
"epoch": 0.23,
"learning_rate": 2.7689742222222223e-05,
"loss": 0.002,
"step": 32500
},
{
"epoch": 0.23,
"learning_rate": 2.7654186666666665e-05,
"loss": 0.002,
"step": 33000
},
{
"epoch": 0.24,
"learning_rate": 2.7618702222222223e-05,
"loss": 0.0021,
"step": 33500
},
{
"epoch": 0.24,
"learning_rate": 2.758314666666667e-05,
"loss": 0.0021,
"step": 34000
},
{
"epoch": 0.25,
"learning_rate": 2.7547591111111115e-05,
"loss": 0.0021,
"step": 34500
},
{
"epoch": 0.25,
"learning_rate": 2.7512035555555553e-05,
"loss": 0.0021,
"step": 35000
},
{
"epoch": 0.25,
"learning_rate": 2.7476551111111112e-05,
"loss": 0.002,
"step": 35500
},
{
"epoch": 0.26,
"learning_rate": 2.7440995555555557e-05,
"loss": 0.002,
"step": 36000
},
{
"epoch": 0.26,
"learning_rate": 2.7405440000000003e-05,
"loss": 0.0021,
"step": 36500
},
{
"epoch": 0.26,
"learning_rate": 2.7369884444444445e-05,
"loss": 0.002,
"step": 37000
},
{
"epoch": 0.27,
"learning_rate": 2.733432888888889e-05,
"loss": 0.002,
"step": 37500
},
{
"epoch": 0.27,
"learning_rate": 2.7298844444444446e-05,
"loss": 0.002,
"step": 38000
},
{
"epoch": 0.27,
"learning_rate": 2.726328888888889e-05,
"loss": 0.002,
"step": 38500
},
{
"epoch": 0.28,
"learning_rate": 2.7227733333333334e-05,
"loss": 0.002,
"step": 39000
},
{
"epoch": 0.28,
"learning_rate": 2.719217777777778e-05,
"loss": 0.002,
"step": 39500
},
{
"epoch": 0.28,
"learning_rate": 2.7156693333333334e-05,
"loss": 0.002,
"step": 40000
},
{
"epoch": 0.29,
"learning_rate": 2.7121137777777777e-05,
"loss": 0.002,
"step": 40500
},
{
"epoch": 0.29,
"learning_rate": 2.7085582222222222e-05,
"loss": 0.002,
"step": 41000
},
{
"epoch": 0.3,
"learning_rate": 2.7050026666666668e-05,
"loss": 0.002,
"step": 41500
},
{
"epoch": 0.3,
"learning_rate": 2.7014542222222223e-05,
"loss": 0.002,
"step": 42000
},
{
"epoch": 0.3,
"learning_rate": 2.6978986666666665e-05,
"loss": 0.0019,
"step": 42500
},
{
"epoch": 0.31,
"learning_rate": 2.694343111111111e-05,
"loss": 0.0019,
"step": 43000
},
{
"epoch": 0.31,
"learning_rate": 2.6907875555555556e-05,
"loss": 0.0019,
"step": 43500
},
{
"epoch": 0.31,
"learning_rate": 2.687232e-05,
"loss": 0.002,
"step": 44000
},
{
"epoch": 0.32,
"learning_rate": 2.6836835555555557e-05,
"loss": 0.002,
"step": 44500
},
{
"epoch": 0.32,
"learning_rate": 2.680128e-05,
"loss": 0.0019,
"step": 45000
},
{
"epoch": 0.32,
"learning_rate": 2.6765724444444445e-05,
"loss": 0.0019,
"step": 45500
},
{
"epoch": 0.33,
"learning_rate": 2.673016888888889e-05,
"loss": 0.002,
"step": 46000
},
{
"epoch": 0.33,
"learning_rate": 2.6694613333333332e-05,
"loss": 0.0019,
"step": 46500
},
{
"epoch": 0.33,
"learning_rate": 2.6659057777777778e-05,
"loss": 0.0019,
"step": 47000
},
{
"epoch": 0.34,
"learning_rate": 2.6623573333333336e-05,
"loss": 0.002,
"step": 47500
},
{
"epoch": 0.34,
"learning_rate": 2.658801777777778e-05,
"loss": 0.0019,
"step": 48000
},
{
"epoch": 0.34,
"learning_rate": 2.655246222222222e-05,
"loss": 0.0019,
"step": 48500
},
{
"epoch": 0.35,
"learning_rate": 2.6516906666666666e-05,
"loss": 0.0019,
"step": 49000
},
{
"epoch": 0.35,
"learning_rate": 2.6481351111111112e-05,
"loss": 0.0019,
"step": 49500
},
{
"epoch": 0.36,
"learning_rate": 2.6445866666666667e-05,
"loss": 0.0019,
"step": 50000
},
{
"epoch": 0.36,
"learning_rate": 2.6410311111111112e-05,
"loss": 0.0019,
"step": 50500
},
{
"epoch": 0.36,
"learning_rate": 2.6374755555555558e-05,
"loss": 0.0019,
"step": 51000
},
{
"epoch": 0.37,
"learning_rate": 2.6339200000000004e-05,
"loss": 0.0019,
"step": 51500
},
{
"epoch": 0.37,
"learning_rate": 2.6303715555555555e-05,
"loss": 0.0019,
"step": 52000
},
{
"epoch": 0.37,
"learning_rate": 2.626816e-05,
"loss": 0.0019,
"step": 52500
},
{
"epoch": 0.38,
"learning_rate": 2.6232604444444446e-05,
"loss": 0.0019,
"step": 53000
},
{
"epoch": 0.38,
"learning_rate": 2.6197048888888892e-05,
"loss": 0.0018,
"step": 53500
},
{
"epoch": 0.38,
"learning_rate": 2.6161564444444444e-05,
"loss": 0.0019,
"step": 54000
},
{
"epoch": 0.39,
"learning_rate": 2.612600888888889e-05,
"loss": 0.0019,
"step": 54500
},
{
"epoch": 0.39,
"learning_rate": 2.6090453333333335e-05,
"loss": 0.0019,
"step": 55000
},
{
"epoch": 0.39,
"learning_rate": 2.6054897777777777e-05,
"loss": 0.0019,
"step": 55500
},
{
"epoch": 0.4,
"learning_rate": 2.6019413333333332e-05,
"loss": 0.0018,
"step": 56000
},
{
"epoch": 0.4,
"learning_rate": 2.5983857777777778e-05,
"loss": 0.0018,
"step": 56500
},
{
"epoch": 0.41,
"learning_rate": 2.5948302222222223e-05,
"loss": 0.0019,
"step": 57000
},
{
"epoch": 0.41,
"learning_rate": 2.5912746666666666e-05,
"loss": 0.0019,
"step": 57500
},
{
"epoch": 0.41,
"learning_rate": 2.587719111111111e-05,
"loss": 0.0018,
"step": 58000
},
{
"epoch": 0.42,
"learning_rate": 2.5841706666666666e-05,
"loss": 0.0018,
"step": 58500
},
{
"epoch": 0.42,
"learning_rate": 2.5806151111111112e-05,
"loss": 0.0018,
"step": 59000
},
{
"epoch": 0.42,
"learning_rate": 2.5770595555555554e-05,
"loss": 0.0018,
"step": 59500
},
{
"epoch": 0.43,
"learning_rate": 2.573504e-05,
"loss": 0.0018,
"step": 60000
},
{
"epoch": 0.43,
"learning_rate": 2.5699484444444445e-05,
"loss": 0.0019,
"step": 60500
},
{
"epoch": 0.43,
"learning_rate": 2.5664e-05,
"loss": 0.0018,
"step": 61000
},
{
"epoch": 0.44,
"learning_rate": 2.5628444444444446e-05,
"loss": 0.0018,
"step": 61500
},
{
"epoch": 0.44,
"learning_rate": 2.5592888888888888e-05,
"loss": 0.0018,
"step": 62000
},
{
"epoch": 0.44,
"learning_rate": 2.5557333333333333e-05,
"loss": 0.0019,
"step": 62500
},
{
"epoch": 0.45,
"learning_rate": 2.552184888888889e-05,
"loss": 0.0018,
"step": 63000
},
{
"epoch": 0.45,
"learning_rate": 2.5486293333333334e-05,
"loss": 0.0018,
"step": 63500
},
{
"epoch": 0.46,
"learning_rate": 2.545073777777778e-05,
"loss": 0.0018,
"step": 64000
},
{
"epoch": 0.46,
"learning_rate": 2.5415182222222225e-05,
"loss": 0.0018,
"step": 64500
},
{
"epoch": 0.46,
"learning_rate": 2.5379697777777777e-05,
"loss": 0.0018,
"step": 65000
},
{
"epoch": 0.47,
"learning_rate": 2.5344142222222223e-05,
"loss": 0.0018,
"step": 65500
},
{
"epoch": 0.47,
"learning_rate": 2.5308586666666668e-05,
"loss": 0.0018,
"step": 66000
},
{
"epoch": 0.47,
"learning_rate": 2.5273031111111114e-05,
"loss": 0.0018,
"step": 66500
},
{
"epoch": 0.48,
"learning_rate": 2.5237475555555556e-05,
"loss": 0.0018,
"step": 67000
},
{
"epoch": 0.48,
"learning_rate": 2.520192e-05,
"loss": 0.0018,
"step": 67500
},
{
"epoch": 0.48,
"learning_rate": 2.5166364444444447e-05,
"loss": 0.0018,
"step": 68000
},
{
"epoch": 0.49,
"learning_rate": 2.513088e-05,
"loss": 0.0018,
"step": 68500
},
{
"epoch": 0.49,
"learning_rate": 2.5095324444444444e-05,
"loss": 0.0018,
"step": 69000
},
{
"epoch": 0.49,
"learning_rate": 2.505976888888889e-05,
"loss": 0.0018,
"step": 69500
},
{
"epoch": 0.5,
"learning_rate": 2.5024213333333335e-05,
"loss": 0.0018,
"step": 70000
},
{
"epoch": 0.5,
"learning_rate": 2.4988728888888887e-05,
"loss": 0.0017,
"step": 70500
},
{
"epoch": 0.5,
"learning_rate": 2.4953173333333333e-05,
"loss": 0.0018,
"step": 71000
},
{
"epoch": 0.51,
"learning_rate": 2.491761777777778e-05,
"loss": 0.0018,
"step": 71500
},
{
"epoch": 0.51,
"learning_rate": 2.4882062222222224e-05,
"loss": 0.0018,
"step": 72000
},
{
"epoch": 0.52,
"learning_rate": 2.4846506666666666e-05,
"loss": 0.0018,
"step": 72500
},
{
"epoch": 0.52,
"learning_rate": 2.481102222222222e-05,
"loss": 0.0017,
"step": 73000
},
{
"epoch": 0.52,
"learning_rate": 2.4775466666666667e-05,
"loss": 0.0018,
"step": 73500
},
{
"epoch": 0.53,
"learning_rate": 2.4739911111111112e-05,
"loss": 0.0018,
"step": 74000
},
{
"epoch": 0.53,
"learning_rate": 2.4704355555555554e-05,
"loss": 0.0017,
"step": 74500
},
{
"epoch": 0.53,
"learning_rate": 2.46688e-05,
"loss": 0.0018,
"step": 75000
},
{
"epoch": 0.54,
"learning_rate": 2.463331555555556e-05,
"loss": 0.0017,
"step": 75500
},
{
"epoch": 0.54,
"learning_rate": 2.459776e-05,
"loss": 0.0018,
"step": 76000
},
{
"epoch": 0.54,
"learning_rate": 2.4562204444444443e-05,
"loss": 0.0017,
"step": 76500
},
{
"epoch": 0.55,
"learning_rate": 2.452664888888889e-05,
"loss": 0.0017,
"step": 77000
},
{
"epoch": 0.55,
"learning_rate": 2.4491093333333334e-05,
"loss": 0.0018,
"step": 77500
},
{
"epoch": 0.55,
"learning_rate": 2.445560888888889e-05,
"loss": 0.0017,
"step": 78000
},
{
"epoch": 0.56,
"learning_rate": 2.4420053333333335e-05,
"loss": 0.0018,
"step": 78500
},
{
"epoch": 0.56,
"learning_rate": 2.438449777777778e-05,
"loss": 0.0017,
"step": 79000
},
{
"epoch": 0.57,
"learning_rate": 2.4348942222222222e-05,
"loss": 0.0018,
"step": 79500
},
{
"epoch": 0.57,
"learning_rate": 2.4313386666666665e-05,
"loss": 0.0017,
"step": 80000
},
{
"epoch": 0.57,
"learning_rate": 2.4277902222222223e-05,
"loss": 0.0018,
"step": 80500
},
{
"epoch": 0.58,
"learning_rate": 2.424234666666667e-05,
"loss": 0.0017,
"step": 81000
},
{
"epoch": 0.58,
"learning_rate": 2.4206791111111114e-05,
"loss": 0.0018,
"step": 81500
},
{
"epoch": 0.58,
"learning_rate": 2.4171235555555556e-05,
"loss": 0.0017,
"step": 82000
},
{
"epoch": 0.59,
"learning_rate": 2.4135680000000002e-05,
"loss": 0.0017,
"step": 82500
},
{
"epoch": 0.59,
"learning_rate": 2.4100195555555557e-05,
"loss": 0.0017,
"step": 83000
},
{
"epoch": 0.59,
"learning_rate": 2.4064640000000003e-05,
"loss": 0.0017,
"step": 83500
},
{
"epoch": 0.6,
"learning_rate": 2.4029084444444445e-05,
"loss": 0.0017,
"step": 84000
},
{
"epoch": 0.6,
"learning_rate": 2.399352888888889e-05,
"loss": 0.0016,
"step": 84500
},
{
"epoch": 0.6,
"learning_rate": 2.3958044444444446e-05,
"loss": 0.0017,
"step": 85000
},
{
"epoch": 0.61,
"learning_rate": 2.3922488888888888e-05,
"loss": 0.0017,
"step": 85500
},
{
"epoch": 0.61,
"learning_rate": 2.3886933333333333e-05,
"loss": 0.0017,
"step": 86000
},
{
"epoch": 0.62,
"learning_rate": 2.385137777777778e-05,
"loss": 0.0017,
"step": 86500
},
{
"epoch": 0.62,
"learning_rate": 2.3815822222222224e-05,
"loss": 0.0017,
"step": 87000
},
{
"epoch": 0.62,
"learning_rate": 2.3780337777777776e-05,
"loss": 0.0017,
"step": 87500
},
{
"epoch": 0.63,
"learning_rate": 2.3744782222222222e-05,
"loss": 0.0016,
"step": 88000
},
{
"epoch": 0.63,
"learning_rate": 2.3709226666666667e-05,
"loss": 0.0017,
"step": 88500
},
{
"epoch": 0.63,
"learning_rate": 2.3673671111111113e-05,
"loss": 0.0017,
"step": 89000
},
{
"epoch": 0.64,
"learning_rate": 2.3638186666666665e-05,
"loss": 0.0016,
"step": 89500
},
{
"epoch": 0.64,
"learning_rate": 2.360263111111111e-05,
"loss": 0.0017,
"step": 90000
},
{
"epoch": 0.64,
"learning_rate": 2.3567075555555556e-05,
"loss": 0.0017,
"step": 90500
},
{
"epoch": 0.65,
"learning_rate": 2.353152e-05,
"loss": 0.0017,
"step": 91000
},
{
"epoch": 0.65,
"learning_rate": 2.3495964444444443e-05,
"loss": 0.0016,
"step": 91500
},
{
"epoch": 0.65,
"learning_rate": 2.346040888888889e-05,
"loss": 0.0017,
"step": 92000
},
{
"epoch": 0.66,
"learning_rate": 2.3424924444444448e-05,
"loss": 0.0017,
"step": 92500
},
{
"epoch": 0.66,
"learning_rate": 2.3389368888888886e-05,
"loss": 0.0017,
"step": 93000
},
{
"epoch": 0.66,
"learning_rate": 2.3353813333333332e-05,
"loss": 0.0017,
"step": 93500
},
{
"epoch": 0.67,
"learning_rate": 2.3318257777777777e-05,
"loss": 0.0016,
"step": 94000
},
{
"epoch": 0.67,
"learning_rate": 2.3282702222222223e-05,
"loss": 0.0017,
"step": 94500
},
{
"epoch": 0.68,
"learning_rate": 2.3247146666666665e-05,
"loss": 0.0016,
"step": 95000
},
{
"epoch": 0.68,
"learning_rate": 2.3211662222222224e-05,
"loss": 0.0016,
"step": 95500
},
{
"epoch": 0.68,
"learning_rate": 2.317610666666667e-05,
"loss": 0.0017,
"step": 96000
},
{
"epoch": 0.69,
"learning_rate": 2.314055111111111e-05,
"loss": 0.0017,
"step": 96500
},
{
"epoch": 0.69,
"learning_rate": 2.3104995555555554e-05,
"loss": 0.0017,
"step": 97000
},
{
"epoch": 0.69,
"learning_rate": 2.306944e-05,
"loss": 0.0017,
"step": 97500
},
{
"epoch": 0.7,
"learning_rate": 2.3033955555555558e-05,
"loss": 0.0017,
"step": 98000
},
{
"epoch": 0.7,
"learning_rate": 2.2998400000000003e-05,
"loss": 0.0016,
"step": 98500
},
{
"epoch": 0.7,
"learning_rate": 2.2962844444444445e-05,
"loss": 0.0016,
"step": 99000
},
{
"epoch": 0.71,
"learning_rate": 2.292728888888889e-05,
"loss": 0.0016,
"step": 99500
},
{
"epoch": 0.71,
"learning_rate": 2.2891733333333333e-05,
"loss": 0.0016,
"step": 100000
},
{
"epoch": 0.71,
"learning_rate": 2.2856177777777775e-05,
"loss": 0.0016,
"step": 100500
},
{
"epoch": 0.72,
"learning_rate": 2.282062222222222e-05,
"loss": 0.0017,
"step": 101000
},
{
"epoch": 0.72,
"learning_rate": 2.2785066666666666e-05,
"loss": 0.0016,
"step": 101500
},
{
"epoch": 0.73,
"learning_rate": 2.2749582222222225e-05,
"loss": 0.0016,
"step": 102000
},
{
"epoch": 0.73,
"learning_rate": 2.2714026666666667e-05,
"loss": 0.0016,
"step": 102500
},
{
"epoch": 0.73,
"learning_rate": 2.2678471111111113e-05,
"loss": 0.0016,
"step": 103000
},
{
"epoch": 0.74,
"learning_rate": 2.2642986666666668e-05,
"loss": 0.0016,
"step": 103500
},
{
"epoch": 0.74,
"learning_rate": 2.2607431111111113e-05,
"loss": 0.0016,
"step": 104000
},
{
"epoch": 0.74,
"learning_rate": 2.2571875555555556e-05,
"loss": 0.0017,
"step": 104500
},
{
"epoch": 0.75,
"learning_rate": 2.253632e-05,
"loss": 0.0016,
"step": 105000
},
{
"epoch": 0.75,
"learning_rate": 2.2500764444444447e-05,
"loss": 0.0016,
"step": 105500
},
{
"epoch": 0.75,
"learning_rate": 2.2465208888888892e-05,
"loss": 0.0016,
"step": 106000
},
{
"epoch": 0.76,
"learning_rate": 2.2429653333333334e-05,
"loss": 0.0016,
"step": 106500
},
{
"epoch": 0.76,
"learning_rate": 2.239409777777778e-05,
"loss": 0.0016,
"step": 107000
},
{
"epoch": 0.76,
"learning_rate": 2.2358613333333335e-05,
"loss": 0.0016,
"step": 107500
},
{
"epoch": 0.77,
"learning_rate": 2.2323057777777777e-05,
"loss": 0.0016,
"step": 108000
},
{
"epoch": 0.77,
"learning_rate": 2.2287573333333332e-05,
"loss": 0.0016,
"step": 108500
},
{
"epoch": 0.78,
"learning_rate": 2.2252017777777778e-05,
"loss": 0.0016,
"step": 109000
},
{
"epoch": 0.78,
"learning_rate": 2.2216462222222224e-05,
"loss": 0.0016,
"step": 109500
},
{
"epoch": 0.78,
"learning_rate": 2.2180906666666666e-05,
"loss": 0.0016,
"step": 110000
},
{
"epoch": 0.79,
"learning_rate": 2.214535111111111e-05,
"loss": 0.0016,
"step": 110500
},
{
"epoch": 0.79,
"learning_rate": 2.2109866666666666e-05,
"loss": 0.0016,
"step": 111000