|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 690, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.468208092485549e-07, |
|
"loss": 6.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.936416184971098e-07, |
|
"loss": 6.0195, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.0404624277456647e-06, |
|
"loss": 6.0156, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.3872832369942195e-06, |
|
"loss": 6.0352, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.7341040462427746e-06, |
|
"loss": 6.0273, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.0809248554913294e-06, |
|
"loss": 5.9805, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.4277456647398847e-06, |
|
"loss": 6.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.774566473988439e-06, |
|
"loss": 5.9375, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.1213872832369943e-06, |
|
"loss": 5.9023, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.468208092485549e-06, |
|
"loss": 5.9219, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.8150289017341036e-06, |
|
"loss": 5.9531, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.161849710982659e-06, |
|
"loss": 5.9766, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.508670520231214e-06, |
|
"loss": 5.9141, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.855491329479769e-06, |
|
"loss": 5.8828, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.202312138728324e-06, |
|
"loss": 5.9219, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.549132947976878e-06, |
|
"loss": 5.8633, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.895953757225434e-06, |
|
"loss": 5.9062, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.242774566473989e-06, |
|
"loss": 5.8672, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.589595375722544e-06, |
|
"loss": 5.7695, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 6.936416184971098e-06, |
|
"loss": 5.8008, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.283236994219653e-06, |
|
"loss": 5.75, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.630057803468207e-06, |
|
"loss": 5.6953, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 7.976878612716762e-06, |
|
"loss": 5.6758, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.323699421965318e-06, |
|
"loss": 5.6055, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.670520231213873e-06, |
|
"loss": 5.5859, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.017341040462428e-06, |
|
"loss": 5.4531, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.364161849710983e-06, |
|
"loss": 5.5078, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.710982658959539e-06, |
|
"loss": 5.3711, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.0057803468208092e-05, |
|
"loss": 5.1953, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.0404624277456647e-05, |
|
"loss": 5.2578, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.0751445086705203e-05, |
|
"loss": 5.0273, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.1098265895953756e-05, |
|
"loss": 4.9766, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.1445086705202312e-05, |
|
"loss": 4.7852, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.1791907514450869e-05, |
|
"loss": 4.8984, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.2138728323699422e-05, |
|
"loss": 4.625, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.2485549132947977e-05, |
|
"loss": 4.5156, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.2832369942196533e-05, |
|
"loss": 4.5508, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.3179190751445088e-05, |
|
"loss": 4.3281, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.3526011560693641e-05, |
|
"loss": 4.5938, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.3872832369942197e-05, |
|
"loss": 4.1172, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.4219653179190752e-05, |
|
"loss": 4.2188, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.4566473988439305e-05, |
|
"loss": 4.0156, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.491329479768786e-05, |
|
"loss": 4.0234, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.5260115606936414e-05, |
|
"loss": 3.9141, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.560693641618497e-05, |
|
"loss": 3.6562, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.5953757225433525e-05, |
|
"loss": 3.6133, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.630057803468208e-05, |
|
"loss": 3.1582, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.6647398843930635e-05, |
|
"loss": 3.248, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.6994219653179194e-05, |
|
"loss": 3.0996, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7341040462427746e-05, |
|
"loss": 3.1875, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.76878612716763e-05, |
|
"loss": 2.9941, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.8034682080924856e-05, |
|
"loss": 2.9297, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.838150289017341e-05, |
|
"loss": 3.1094, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.8728323699421967e-05, |
|
"loss": 2.7812, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9075144508670522e-05, |
|
"loss": 1.9785, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.9421965317919077e-05, |
|
"loss": 3.4531, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.976878612716763e-05, |
|
"loss": 2.1875, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.0115606936416184e-05, |
|
"loss": 1.9697, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.046242774566474e-05, |
|
"loss": 2.1777, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.0809248554913295e-05, |
|
"loss": 1.8594, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.115606936416185e-05, |
|
"loss": 2.2441, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.1502890173410405e-05, |
|
"loss": 2.3242, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 2.184971098265896e-05, |
|
"loss": 1.9541, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.2196531791907513e-05, |
|
"loss": 1.7412, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.2543352601156068e-05, |
|
"loss": 2.2969, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.2890173410404623e-05, |
|
"loss": 1.3818, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 2.323699421965318e-05, |
|
"loss": 1.9668, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3583815028901737e-05, |
|
"loss": 2.4727, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.3930635838150292e-05, |
|
"loss": 2.0645, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.4277456647398844e-05, |
|
"loss": 2.0312, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.46242774566474e-05, |
|
"loss": 2.0547, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.4971098265895955e-05, |
|
"loss": 1.7764, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.531791907514451e-05, |
|
"loss": 1.6074, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.5664739884393065e-05, |
|
"loss": 1.2148, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.601156069364162e-05, |
|
"loss": 1.3525, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.6358381502890176e-05, |
|
"loss": 1.6963, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.6705202312138728e-05, |
|
"loss": 1.4746, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7052023121387283e-05, |
|
"loss": 1.8105, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7398843930635838e-05, |
|
"loss": 1.5195, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.7745664739884393e-05, |
|
"loss": 1.1855, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.809248554913295e-05, |
|
"loss": 1.3115, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.8439306358381504e-05, |
|
"loss": 1.6318, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.878612716763006e-05, |
|
"loss": 2.2656, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 2.913294797687861e-05, |
|
"loss": 1.8789, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.9479768786127166e-05, |
|
"loss": 1.3555, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 2.982658959537572e-05, |
|
"loss": 1.1211, |
|
"step": 86 |
|
}, |