|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 421875, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9964515555555556e-05, |
|
"loss": 0.0351, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9928959999999998e-05, |
|
"loss": 0.0074, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.9893404444444444e-05, |
|
"loss": 0.0063, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 2.985784888888889e-05, |
|
"loss": 0.0055, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9822293333333335e-05, |
|
"loss": 0.0051, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9786737777777777e-05, |
|
"loss": 0.0047, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9751182222222222e-05, |
|
"loss": 0.0043, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9715626666666668e-05, |
|
"loss": 0.0042, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2.9680071111111114e-05, |
|
"loss": 0.0039, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9644515555555556e-05, |
|
"loss": 0.0038, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.960896e-05, |
|
"loss": 0.0036, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.9573404444444447e-05, |
|
"loss": 0.0034, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9537848888888892e-05, |
|
"loss": 0.0033, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9502293333333334e-05, |
|
"loss": 0.0033, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.9466737777777777e-05, |
|
"loss": 0.0032, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9431182222222222e-05, |
|
"loss": 0.0031, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.9395626666666668e-05, |
|
"loss": 0.0032, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 2.936007111111111e-05, |
|
"loss": 0.0029, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9324515555555555e-05, |
|
"loss": 0.0029, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.928896e-05, |
|
"loss": 0.0029, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 2.9253475555555556e-05, |
|
"loss": 0.0028, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9217919999999998e-05, |
|
"loss": 0.0028, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 2.9182364444444444e-05, |
|
"loss": 0.0027, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.914680888888889e-05, |
|
"loss": 0.0027, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.9111324444444445e-05, |
|
"loss": 0.0028, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.907576888888889e-05, |
|
"loss": 0.0027, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.9040213333333336e-05, |
|
"loss": 0.0026, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.900465777777778e-05, |
|
"loss": 0.0026, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.8969173333333333e-05, |
|
"loss": 0.0026, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.893361777777778e-05, |
|
"loss": 0.0025, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.8898062222222224e-05, |
|
"loss": 0.0025, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 2.886250666666667e-05, |
|
"loss": 0.0025, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.882702222222222e-05, |
|
"loss": 0.0025, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.8791466666666667e-05, |
|
"loss": 0.0024, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.8755911111111113e-05, |
|
"loss": 0.0024, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.8720355555555558e-05, |
|
"loss": 0.0024, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.868487111111111e-05, |
|
"loss": 0.0024, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8649315555555555e-05, |
|
"loss": 0.0024, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.861376e-05, |
|
"loss": 0.0024, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.8578204444444443e-05, |
|
"loss": 0.0023, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8542720000000002e-05, |
|
"loss": 0.0023, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.8507164444444447e-05, |
|
"loss": 0.0023, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 2.847160888888889e-05, |
|
"loss": 0.0023, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.843605333333333e-05, |
|
"loss": 0.0023, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8400497777777777e-05, |
|
"loss": 0.0022, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 2.8365013333333336e-05, |
|
"loss": 0.0022, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.832945777777778e-05, |
|
"loss": 0.0022, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.8293902222222223e-05, |
|
"loss": 0.0022, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.825834666666667e-05, |
|
"loss": 0.0021, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8222862222222224e-05, |
|
"loss": 0.0023, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8187306666666666e-05, |
|
"loss": 0.0022, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.8151751111111112e-05, |
|
"loss": 0.0021, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8116195555555557e-05, |
|
"loss": 0.0021, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.8080711111111113e-05, |
|
"loss": 0.0021, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.8045155555555555e-05, |
|
"loss": 0.0022, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.80096e-05, |
|
"loss": 0.0022, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.7974044444444446e-05, |
|
"loss": 0.0022, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.793856e-05, |
|
"loss": 0.0021, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.7903004444444443e-05, |
|
"loss": 0.0021, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.786744888888889e-05, |
|
"loss": 0.0021, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.7831893333333334e-05, |
|
"loss": 0.0021, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.779633777777778e-05, |
|
"loss": 0.0021, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.776085333333333e-05, |
|
"loss": 0.0021, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7725297777777777e-05, |
|
"loss": 0.0021, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7689742222222223e-05, |
|
"loss": 0.002, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7654186666666665e-05, |
|
"loss": 0.002, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.7618702222222223e-05, |
|
"loss": 0.0021, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.758314666666667e-05, |
|
"loss": 0.0021, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.7547591111111115e-05, |
|
"loss": 0.0021, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.7512035555555553e-05, |
|
"loss": 0.0021, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.7476551111111112e-05, |
|
"loss": 0.002, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.7440995555555557e-05, |
|
"loss": 0.002, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.7405440000000003e-05, |
|
"loss": 0.0021, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 2.7369884444444445e-05, |
|
"loss": 0.002, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.733432888888889e-05, |
|
"loss": 0.002, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.7298844444444446e-05, |
|
"loss": 0.002, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.726328888888889e-05, |
|
"loss": 0.002, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.7227733333333334e-05, |
|
"loss": 0.002, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.719217777777778e-05, |
|
"loss": 0.002, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.7156693333333334e-05, |
|
"loss": 0.002, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.7121137777777777e-05, |
|
"loss": 0.002, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 2.7085582222222222e-05, |
|
"loss": 0.002, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.7050026666666668e-05, |
|
"loss": 0.002, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.7014542222222223e-05, |
|
"loss": 0.002, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.6978986666666665e-05, |
|
"loss": 0.0019, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.694343111111111e-05, |
|
"loss": 0.0019, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.6907875555555556e-05, |
|
"loss": 0.0019, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 2.687232e-05, |
|
"loss": 0.002, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6836835555555557e-05, |
|
"loss": 0.002, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.680128e-05, |
|
"loss": 0.0019, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 2.6765724444444445e-05, |
|
"loss": 0.0019, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.673016888888889e-05, |
|
"loss": 0.002, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.6694613333333332e-05, |
|
"loss": 0.0019, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.6659057777777778e-05, |
|
"loss": 0.0019, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.6623573333333336e-05, |
|
"loss": 0.002, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.658801777777778e-05, |
|
"loss": 0.0019, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 2.655246222222222e-05, |
|
"loss": 0.0019, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.6516906666666666e-05, |
|
"loss": 0.0019, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 2.6481351111111112e-05, |
|
"loss": 0.0019, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.6445866666666667e-05, |
|
"loss": 0.0019, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.6410311111111112e-05, |
|
"loss": 0.0019, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 2.6374755555555558e-05, |
|
"loss": 0.0019, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.6339200000000004e-05, |
|
"loss": 0.0019, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.6303715555555555e-05, |
|
"loss": 0.0019, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.626816e-05, |
|
"loss": 0.0019, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.6232604444444446e-05, |
|
"loss": 0.0019, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.6197048888888892e-05, |
|
"loss": 0.0018, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 2.6161564444444444e-05, |
|
"loss": 0.0019, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.612600888888889e-05, |
|
"loss": 0.0019, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.6090453333333335e-05, |
|
"loss": 0.0019, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 2.6054897777777777e-05, |
|
"loss": 0.0019, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.6019413333333332e-05, |
|
"loss": 0.0018, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.5983857777777778e-05, |
|
"loss": 0.0018, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.5948302222222223e-05, |
|
"loss": 0.0019, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.5912746666666666e-05, |
|
"loss": 0.0019, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2.587719111111111e-05, |
|
"loss": 0.0018, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5841706666666666e-05, |
|
"loss": 0.0018, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5806151111111112e-05, |
|
"loss": 0.0018, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.5770595555555554e-05, |
|
"loss": 0.0018, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.573504e-05, |
|
"loss": 0.0018, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5699484444444445e-05, |
|
"loss": 0.0019, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 2.5664e-05, |
|
"loss": 0.0018, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.5628444444444446e-05, |
|
"loss": 0.0018, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.5592888888888888e-05, |
|
"loss": 0.0018, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.5557333333333333e-05, |
|
"loss": 0.0019, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.552184888888889e-05, |
|
"loss": 0.0018, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 2.5486293333333334e-05, |
|
"loss": 0.0018, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.545073777777778e-05, |
|
"loss": 0.0018, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.5415182222222225e-05, |
|
"loss": 0.0018, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.5379697777777777e-05, |
|
"loss": 0.0018, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.5344142222222223e-05, |
|
"loss": 0.0018, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.5308586666666668e-05, |
|
"loss": 0.0018, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.5273031111111114e-05, |
|
"loss": 0.0018, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5237475555555556e-05, |
|
"loss": 0.0018, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.520192e-05, |
|
"loss": 0.0018, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.5166364444444447e-05, |
|
"loss": 0.0018, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.513088e-05, |
|
"loss": 0.0018, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.5095324444444444e-05, |
|
"loss": 0.0018, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.505976888888889e-05, |
|
"loss": 0.0018, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.5024213333333335e-05, |
|
"loss": 0.0018, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4988728888888887e-05, |
|
"loss": 0.0017, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4953173333333333e-05, |
|
"loss": 0.0018, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.491761777777778e-05, |
|
"loss": 0.0018, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.4882062222222224e-05, |
|
"loss": 0.0018, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4846506666666666e-05, |
|
"loss": 0.0018, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.481102222222222e-05, |
|
"loss": 0.0017, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.4775466666666667e-05, |
|
"loss": 0.0018, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.4739911111111112e-05, |
|
"loss": 0.0018, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.4704355555555554e-05, |
|
"loss": 0.0017, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.46688e-05, |
|
"loss": 0.0018, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.463331555555556e-05, |
|
"loss": 0.0017, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.459776e-05, |
|
"loss": 0.0018, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.4562204444444443e-05, |
|
"loss": 0.0017, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.452664888888889e-05, |
|
"loss": 0.0017, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4491093333333334e-05, |
|
"loss": 0.0018, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.445560888888889e-05, |
|
"loss": 0.0017, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4420053333333335e-05, |
|
"loss": 0.0018, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.438449777777778e-05, |
|
"loss": 0.0017, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4348942222222222e-05, |
|
"loss": 0.0018, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4313386666666665e-05, |
|
"loss": 0.0017, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.4277902222222223e-05, |
|
"loss": 0.0018, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.424234666666667e-05, |
|
"loss": 0.0017, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.4206791111111114e-05, |
|
"loss": 0.0018, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.4171235555555556e-05, |
|
"loss": 0.0017, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.4135680000000002e-05, |
|
"loss": 0.0017, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.4100195555555557e-05, |
|
"loss": 0.0017, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.4064640000000003e-05, |
|
"loss": 0.0017, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.4029084444444445e-05, |
|
"loss": 0.0017, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.399352888888889e-05, |
|
"loss": 0.0016, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.3958044444444446e-05, |
|
"loss": 0.0017, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.3922488888888888e-05, |
|
"loss": 0.0017, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 2.3886933333333333e-05, |
|
"loss": 0.0017, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.385137777777778e-05, |
|
"loss": 0.0017, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.3815822222222224e-05, |
|
"loss": 0.0017, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 2.3780337777777776e-05, |
|
"loss": 0.0017, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.3744782222222222e-05, |
|
"loss": 0.0016, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.3709226666666667e-05, |
|
"loss": 0.0017, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 2.3673671111111113e-05, |
|
"loss": 0.0017, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.3638186666666665e-05, |
|
"loss": 0.0016, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.360263111111111e-05, |
|
"loss": 0.0017, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.3567075555555556e-05, |
|
"loss": 0.0017, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.353152e-05, |
|
"loss": 0.0017, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.3495964444444443e-05, |
|
"loss": 0.0016, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 2.346040888888889e-05, |
|
"loss": 0.0017, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.3424924444444448e-05, |
|
"loss": 0.0017, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.3389368888888886e-05, |
|
"loss": 0.0017, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 2.3353813333333332e-05, |
|
"loss": 0.0017, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.3318257777777777e-05, |
|
"loss": 0.0016, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.3282702222222223e-05, |
|
"loss": 0.0017, |
|
"step": 94500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.3247146666666665e-05, |
|
"loss": 0.0016, |
|
"step": 95000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.3211662222222224e-05, |
|
"loss": 0.0016, |
|
"step": 95500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 2.317610666666667e-05, |
|
"loss": 0.0017, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.314055111111111e-05, |
|
"loss": 0.0017, |
|
"step": 96500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.3104995555555554e-05, |
|
"loss": 0.0017, |
|
"step": 97000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.306944e-05, |
|
"loss": 0.0017, |
|
"step": 97500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.3033955555555558e-05, |
|
"loss": 0.0017, |
|
"step": 98000 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.2998400000000003e-05, |
|
"loss": 0.0016, |
|
"step": 98500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.2962844444444445e-05, |
|
"loss": 0.0016, |
|
"step": 99000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.292728888888889e-05, |
|
"loss": 0.0016, |
|
"step": 99500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.2891733333333333e-05, |
|
"loss": 0.0016, |
|
"step": 100000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 2.2856177777777775e-05, |
|
"loss": 0.0016, |
|
"step": 100500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.282062222222222e-05, |
|
"loss": 0.0017, |
|
"step": 101000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.2785066666666666e-05, |
|
"loss": 0.0016, |
|
"step": 101500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.2749582222222225e-05, |
|
"loss": 0.0016, |
|
"step": 102000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.2714026666666667e-05, |
|
"loss": 0.0016, |
|
"step": 102500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.2678471111111113e-05, |
|
"loss": 0.0016, |
|
"step": 103000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.2642986666666668e-05, |
|
"loss": 0.0016, |
|
"step": 103500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.2607431111111113e-05, |
|
"loss": 0.0016, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 2.2571875555555556e-05, |
|
"loss": 0.0017, |
|
"step": 104500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.253632e-05, |
|
"loss": 0.0016, |
|
"step": 105000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.2500764444444447e-05, |
|
"loss": 0.0016, |
|
"step": 105500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 2.2465208888888892e-05, |
|
"loss": 0.0016, |
|
"step": 106000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.2429653333333334e-05, |
|
"loss": 0.0016, |
|
"step": 106500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.239409777777778e-05, |
|
"loss": 0.0016, |
|
"step": 107000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.2358613333333335e-05, |
|
"loss": 0.0016, |
|
"step": 107500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2323057777777777e-05, |
|
"loss": 0.0016, |
|
"step": 108000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.2287573333333332e-05, |
|
"loss": 0.0016, |
|
"step": 108500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.2252017777777778e-05, |
|
"loss": 0.0016, |
|
"step": 109000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.2216462222222224e-05, |
|
"loss": 0.0016, |
|
"step": 109500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.2180906666666666e-05, |
|
"loss": 0.0016, |
|
"step": 110000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.214535111111111e-05, |
|
"loss": 0.0016, |
|
"step": 110500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.2109866666666666e-05, |
|
"loss": 0.0016, |
|
"step": 111000 |
|