|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"global_step": 94323, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 4.973495329877125e-05, |
|
"loss": 1.7869, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.9469906597542485e-05, |
|
"loss": 1.7591, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 4.920485989631373e-05, |
|
"loss": 1.7192, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 4.893981319508498e-05, |
|
"loss": 1.6754, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.867476649385622e-05, |
|
"loss": 1.6344, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.840971979262746e-05, |
|
"loss": 1.6237, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.8144673091398703e-05, |
|
"loss": 1.5965, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.787962639016995e-05, |
|
"loss": 1.5753, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.761457968894119e-05, |
|
"loss": 1.5598, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.734953298771243e-05, |
|
"loss": 1.5247, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.708448628648368e-05, |
|
"loss": 1.5149, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.681943958525493e-05, |
|
"loss": 1.5089, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.655439288402616e-05, |
|
"loss": 1.4888, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.628934618279741e-05, |
|
"loss": 1.4738, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.602429948156866e-05, |
|
"loss": 1.4535, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.57592527803399e-05, |
|
"loss": 1.474, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.549420607911114e-05, |
|
"loss": 1.4491, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.522915937788238e-05, |
|
"loss": 1.4336, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.496411267665363e-05, |
|
"loss": 1.4323, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.469906597542487e-05, |
|
"loss": 1.4189, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.443401927419611e-05, |
|
"loss": 1.4101, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.416897257296736e-05, |
|
"loss": 1.3992, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.390392587173861e-05, |
|
"loss": 1.3975, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 4.363887917050984e-05, |
|
"loss": 1.3845, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.337383246928109e-05, |
|
"loss": 1.3795, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 4.310878576805233e-05, |
|
"loss": 1.3867, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.284373906682357e-05, |
|
"loss": 1.3647, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.257869236559482e-05, |
|
"loss": 1.3638, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.231364566436606e-05, |
|
"loss": 1.3507, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.204859896313731e-05, |
|
"loss": 1.3462, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.178355226190855e-05, |
|
"loss": 1.3511, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.151850556067979e-05, |
|
"loss": 1.3384, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.125345885945104e-05, |
|
"loss": 1.319, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 4.098841215822228e-05, |
|
"loss": 1.3331, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.072336545699352e-05, |
|
"loss": 1.3119, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 4.045831875576477e-05, |
|
"loss": 1.32, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.019327205453601e-05, |
|
"loss": 1.3224, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.992822535330725e-05, |
|
"loss": 1.2868, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.96631786520785e-05, |
|
"loss": 1.2955, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 3.939813195084974e-05, |
|
"loss": 1.2932, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.913308524962099e-05, |
|
"loss": 1.277, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 3.886803854839223e-05, |
|
"loss": 1.2759, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.860299184716347e-05, |
|
"loss": 1.2793, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 3.833794514593472e-05, |
|
"loss": 1.2614, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.807289844470596e-05, |
|
"loss": 1.2768, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.78078517434772e-05, |
|
"loss": 1.2615, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.754280504224845e-05, |
|
"loss": 1.2525, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.727775834101969e-05, |
|
"loss": 1.2593, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.701271163979093e-05, |
|
"loss": 1.2435, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 3.674766493856218e-05, |
|
"loss": 1.241, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.648261823733342e-05, |
|
"loss": 1.2449, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.621757153610466e-05, |
|
"loss": 1.2229, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.595252483487591e-05, |
|
"loss": 1.2296, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.568747813364715e-05, |
|
"loss": 1.2257, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 3.5422431432418397e-05, |
|
"loss": 1.2292, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.515738473118964e-05, |
|
"loss": 1.2351, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.489233802996088e-05, |
|
"loss": 1.2179, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.4627291328732127e-05, |
|
"loss": 1.2179, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 3.436224462750337e-05, |
|
"loss": 1.2057, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.409719792627461e-05, |
|
"loss": 1.2081, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 3.3832151225045856e-05, |
|
"loss": 1.2069, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.35671045238171e-05, |
|
"loss": 1.1903, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.330205782258834e-05, |
|
"loss": 1.1958, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 3.3037011121359586e-05, |
|
"loss": 1.1746, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.277196442013083e-05, |
|
"loss": 1.1648, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 3.2506917718902075e-05, |
|
"loss": 1.1615, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 3.2241871017673316e-05, |
|
"loss": 1.1732, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 3.197682431644456e-05, |
|
"loss": 1.1664, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 3.1711777615215805e-05, |
|
"loss": 1.1639, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 3.1446730913987046e-05, |
|
"loss": 1.1507, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 3.118168421275829e-05, |
|
"loss": 1.1577, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.0916637511529535e-05, |
|
"loss": 1.1563, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 3.0651590810300776e-05, |
|
"loss": 1.1528, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.0386544109072017e-05, |
|
"loss": 1.1461, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.0121497407843262e-05, |
|
"loss": 1.1574, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.985645070661451e-05, |
|
"loss": 1.1503, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.9591404005385747e-05, |
|
"loss": 1.1522, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.9326357304156992e-05, |
|
"loss": 1.1451, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 2.9061310602928236e-05, |
|
"loss": 1.1335, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8796263901699484e-05, |
|
"loss": 1.1383, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 2.8531217200470722e-05, |
|
"loss": 1.1363, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.8266170499241966e-05, |
|
"loss": 1.1277, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 2.800112379801321e-05, |
|
"loss": 1.1332, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.773607709678446e-05, |
|
"loss": 1.1249, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 2.7471030395555696e-05, |
|
"loss": 1.1283, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.720598369432694e-05, |
|
"loss": 1.1177, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 2.694093699309819e-05, |
|
"loss": 1.1023, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.6675890291869426e-05, |
|
"loss": 1.1245, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 2.641084359064067e-05, |
|
"loss": 1.1194, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 2.6145796889411915e-05, |
|
"loss": 1.1132, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 2.5880750188183163e-05, |
|
"loss": 1.1073, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 2.56157034869544e-05, |
|
"loss": 1.0974, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.5350656785725645e-05, |
|
"loss": 1.1003, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.508561008449689e-05, |
|
"loss": 1.0966, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 2.4820563383268134e-05, |
|
"loss": 1.0928, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.4555516682039375e-05, |
|
"loss": 1.0868, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 2.429046998081062e-05, |
|
"loss": 1.0902, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 2.4025423279581864e-05, |
|
"loss": 1.0978, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 2.376037657835311e-05, |
|
"loss": 1.0828, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 2.349532987712435e-05, |
|
"loss": 1.1016, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 2.3230283175895594e-05, |
|
"loss": 1.0892, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.296523647466684e-05, |
|
"loss": 1.0832, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.270018977343808e-05, |
|
"loss": 1.0668, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 2.2435143072209324e-05, |
|
"loss": 1.0756, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.217009637098057e-05, |
|
"loss": 1.0714, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1905049669751813e-05, |
|
"loss": 1.0592, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.1640002968523054e-05, |
|
"loss": 1.07, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 2.13749562672943e-05, |
|
"loss": 1.0548, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 2.1109909566065543e-05, |
|
"loss": 1.0589, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 2.0844862864836784e-05, |
|
"loss": 1.0662, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 2.057981616360803e-05, |
|
"loss": 1.0678, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 2.0314769462379273e-05, |
|
"loss": 1.0545, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.0049722761150517e-05, |
|
"loss": 1.0486, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 1.978467605992176e-05, |
|
"loss": 1.051, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 1.9519629358693003e-05, |
|
"loss": 1.0389, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 1.9254582657464247e-05, |
|
"loss": 1.0445, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 1.8989535956235492e-05, |
|
"loss": 1.0511, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 1.8724489255006733e-05, |
|
"loss": 1.0384, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 1.8459442553777974e-05, |
|
"loss": 1.0386, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 1.819439585254922e-05, |
|
"loss": 1.0321, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 1.7929349151320463e-05, |
|
"loss": 1.0424, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.7664302450091707e-05, |
|
"loss": 1.0421, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.7399255748862948e-05, |
|
"loss": 1.0334, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 1.7134209047634196e-05, |
|
"loss": 1.0385, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 1.6869162346405437e-05, |
|
"loss": 1.0265, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.660411564517668e-05, |
|
"loss": 1.0245, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 1.6339068943947926e-05, |
|
"loss": 1.0146, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 1.6074022242719167e-05, |
|
"loss": 1.0161, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.580897554149041e-05, |
|
"loss": 1.0196, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.5543928840261653e-05, |
|
"loss": 1.0118, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.52788821390329e-05, |
|
"loss": 0.9996, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1.5013835437804142e-05, |
|
"loss": 1.0026, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 1.4748788736575386e-05, |
|
"loss": 1.0156, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 1.4483742035346629e-05, |
|
"loss": 1.0017, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 1.4218695334117873e-05, |
|
"loss": 1.0129, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.3953648632889116e-05, |
|
"loss": 1.0093, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 1.3688601931660359e-05, |
|
"loss": 0.9939, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 1.3423555230431603e-05, |
|
"loss": 0.9891, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.3158508529202846e-05, |
|
"loss": 0.9978, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.289346182797409e-05, |
|
"loss": 1.0043, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 1.2628415126745333e-05, |
|
"loss": 0.9913, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 1.2363368425516576e-05, |
|
"loss": 0.9986, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 1.209832172428782e-05, |
|
"loss": 0.9892, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 1.1833275023059063e-05, |
|
"loss": 0.9995, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.1568228321830308e-05, |
|
"loss": 0.9944, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 1.130318162060155e-05, |
|
"loss": 0.998, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.1038134919372795e-05, |
|
"loss": 0.9773, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.0773088218144038e-05, |
|
"loss": 0.9928, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 1.050804151691528e-05, |
|
"loss": 0.9787, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.0242994815686525e-05, |
|
"loss": 0.978, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 9.977948114457768e-06, |
|
"loss": 0.9757, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 9.712901413229012e-06, |
|
"loss": 0.9773, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 9.447854712000255e-06, |
|
"loss": 0.9747, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 9.1828080107715e-06, |
|
"loss": 0.9703, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 8.917761309542742e-06, |
|
"loss": 0.9631, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 8.652714608313986e-06, |
|
"loss": 0.9754, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 8.387667907085228e-06, |
|
"loss": 0.9834, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 8.122621205856472e-06, |
|
"loss": 0.9797, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 7.857574504627715e-06, |
|
"loss": 0.9761, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.592527803398959e-06, |
|
"loss": 0.9689, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.327481102170203e-06, |
|
"loss": 0.9683, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 7.062434400941446e-06, |
|
"loss": 0.964, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 6.79738769971269e-06, |
|
"loss": 0.968, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 6.532340998483934e-06, |
|
"loss": 0.9638, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 6.267294297255176e-06, |
|
"loss": 0.9651, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 6.00224759602642e-06, |
|
"loss": 0.9616, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 5.737200894797664e-06, |
|
"loss": 0.9586, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5.472154193568907e-06, |
|
"loss": 0.965, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5.207107492340151e-06, |
|
"loss": 0.9616, |
|
"step": 84500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.942060791111394e-06, |
|
"loss": 0.961, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.677014089882638e-06, |
|
"loss": 0.9471, |
|
"step": 85500 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 4.411967388653881e-06, |
|
"loss": 0.9676, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 4.146920687425124e-06, |
|
"loss": 0.9451, |
|
"step": 86500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 3.881873986196368e-06, |
|
"loss": 0.9501, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 3.6168272849676116e-06, |
|
"loss": 0.949, |
|
"step": 87500 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 3.3517805837388548e-06, |
|
"loss": 0.9511, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 3.0867338825100984e-06, |
|
"loss": 0.9452, |
|
"step": 88500 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 2.821687181281342e-06, |
|
"loss": 0.9458, |
|
"step": 89000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 2.556640480052585e-06, |
|
"loss": 0.9519, |
|
"step": 89500 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 2.2915937788238288e-06, |
|
"loss": 0.9391, |
|
"step": 90000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 2.0265470775950724e-06, |
|
"loss": 0.9483, |
|
"step": 90500 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 1.7615003763663158e-06, |
|
"loss": 0.9483, |
|
"step": 91000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.4964536751375594e-06, |
|
"loss": 0.9454, |
|
"step": 91500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.2314069739088028e-06, |
|
"loss": 0.9371, |
|
"step": 92000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.663602726800462e-07, |
|
"loss": 0.9447, |
|
"step": 92500 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.013135714512897e-07, |
|
"loss": 0.9461, |
|
"step": 93000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 4.362668702225332e-07, |
|
"loss": 0.9397, |
|
"step": 93500 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 1.7122016899377671e-07, |
|
"loss": 0.9397, |
|
"step": 94000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 94323, |
|
"total_flos": 8.797388561511875e+17, |
|
"train_loss": 1.148913297653906, |
|
"train_runtime": 104744.2409, |
|
"train_samples_per_second": 9.005, |
|
"train_steps_per_second": 0.901 |
|
} |
|
], |
|
"max_steps": 94323, |
|
"num_train_epochs": 3, |
|
"total_flos": 8.797388561511875e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|