|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.325045768617493, |
|
"global_step": 15500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 9.952741020793951e-05, |
|
"loss": 2.3854, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 9.905482041587902e-05, |
|
"loss": 2.3565, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.858223062381853e-05, |
|
"loss": 2.288, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.810964083175804e-05, |
|
"loss": 2.2879, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 9.763705103969755e-05, |
|
"loss": 2.3212, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 9.716446124763706e-05, |
|
"loss": 2.2266, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.669187145557656e-05, |
|
"loss": 2.2894, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 9.621928166351607e-05, |
|
"loss": 2.2738, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.574669187145558e-05, |
|
"loss": 2.3342, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.527410207939509e-05, |
|
"loss": 2.2613, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.48015122873346e-05, |
|
"loss": 2.1509, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 9.432892249527411e-05, |
|
"loss": 2.2683, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.385633270321362e-05, |
|
"loss": 2.2501, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.338374291115312e-05, |
|
"loss": 2.2351, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 9.291115311909263e-05, |
|
"loss": 2.2134, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.243856332703214e-05, |
|
"loss": 2.2317, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 9.196597353497165e-05, |
|
"loss": 2.226, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.149338374291116e-05, |
|
"loss": 2.1991, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 9.102079395085067e-05, |
|
"loss": 2.1648, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.054820415879018e-05, |
|
"loss": 2.1412, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.007561436672968e-05, |
|
"loss": 2.2009, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 8.960302457466919e-05, |
|
"loss": 2.205, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 8.91304347826087e-05, |
|
"loss": 2.1226, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 8.865784499054821e-05, |
|
"loss": 2.1963, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.818525519848772e-05, |
|
"loss": 2.0325, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 8.771266540642723e-05, |
|
"loss": 2.1478, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.724007561436674e-05, |
|
"loss": 2.1116, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 8.676748582230624e-05, |
|
"loss": 2.1805, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 8.629489603024575e-05, |
|
"loss": 2.109, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 8.582230623818526e-05, |
|
"loss": 2.1156, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 8.534971644612477e-05, |
|
"loss": 2.152, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 8.487712665406428e-05, |
|
"loss": 2.1512, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 8.440453686200379e-05, |
|
"loss": 2.1396, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 8.39319470699433e-05, |
|
"loss": 2.0951, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 8.34593572778828e-05, |
|
"loss": 2.1533, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.298676748582231e-05, |
|
"loss": 2.0959, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.251417769376182e-05, |
|
"loss": 2.1345, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 8.204158790170132e-05, |
|
"loss": 2.0643, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 8.156899810964084e-05, |
|
"loss": 2.0566, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 8.109640831758035e-05, |
|
"loss": 2.0729, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 8.062381852551986e-05, |
|
"loss": 2.0768, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 8.015122873345936e-05, |
|
"loss": 2.1026, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 7.967863894139886e-05, |
|
"loss": 2.0804, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 7.920604914933838e-05, |
|
"loss": 2.0875, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 7.873345935727789e-05, |
|
"loss": 2.039, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 7.82608695652174e-05, |
|
"loss": 2.0605, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.77882797731569e-05, |
|
"loss": 2.0495, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 7.731568998109642e-05, |
|
"loss": 2.0146, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 7.684310018903592e-05, |
|
"loss": 2.0378, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.637051039697543e-05, |
|
"loss": 2.0806, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 7.589792060491494e-05, |
|
"loss": 2.0675, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 7.542533081285445e-05, |
|
"loss": 2.0247, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 7.495274102079396e-05, |
|
"loss": 2.0442, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 7.448015122873347e-05, |
|
"loss": 2.0232, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 7.400756143667296e-05, |
|
"loss": 2.0648, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 7.353497164461248e-05, |
|
"loss": 2.0579, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.306238185255199e-05, |
|
"loss": 2.0438, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 7.25897920604915e-05, |
|
"loss": 2.0849, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 7.211720226843101e-05, |
|
"loss": 1.9828, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 7.16446124763705e-05, |
|
"loss": 1.9923, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 7.117202268431003e-05, |
|
"loss": 1.9898, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 7.069943289224953e-05, |
|
"loss": 2.0663, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 7.022684310018904e-05, |
|
"loss": 1.9967, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"learning_rate": 6.975425330812855e-05, |
|
"loss": 1.9941, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 6.928166351606805e-05, |
|
"loss": 1.9458, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 6.880907372400757e-05, |
|
"loss": 2.0411, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 6.833648393194706e-05, |
|
"loss": 1.9634, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 6.786389413988659e-05, |
|
"loss": 1.9897, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 6.73913043478261e-05, |
|
"loss": 1.9798, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 6.691871455576559e-05, |
|
"loss": 2.0127, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.36, |
|
"learning_rate": 6.644612476370511e-05, |
|
"loss": 1.9827, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 6.59735349716446e-05, |
|
"loss": 1.9949, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 6.550094517958413e-05, |
|
"loss": 2.0152, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.502835538752364e-05, |
|
"loss": 1.959, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 6.455576559546313e-05, |
|
"loss": 1.9876, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 6.408317580340265e-05, |
|
"loss": 1.9955, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 6.361058601134215e-05, |
|
"loss": 1.9812, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 6.313799621928167e-05, |
|
"loss": 1.9219, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 6.266540642722118e-05, |
|
"loss": 1.9576, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 6.219281663516069e-05, |
|
"loss": 1.9608, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 6.17202268431002e-05, |
|
"loss": 1.9916, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 6.124763705103969e-05, |
|
"loss": 2.0394, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 6.0775047258979214e-05, |
|
"loss": 1.9416, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.0302457466918716e-05, |
|
"loss": 2.0283, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"learning_rate": 5.982986767485823e-05, |
|
"loss": 2.0244, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 5.935727788279773e-05, |
|
"loss": 1.9132, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 5.888468809073724e-05, |
|
"loss": 1.9917, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 5.841209829867676e-05, |
|
"loss": 1.9355, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 5.793950850661626e-05, |
|
"loss": 1.9248, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 5.7466918714555774e-05, |
|
"loss": 2.0129, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 5.6994328922495276e-05, |
|
"loss": 1.9497, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 5.652173913043478e-05, |
|
"loss": 1.8806, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 5.604914933837429e-05, |
|
"loss": 1.9262, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 5.55765595463138e-05, |
|
"loss": 2.0024, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 5.510396975425332e-05, |
|
"loss": 1.9407, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 5.463137996219282e-05, |
|
"loss": 1.991, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 5.415879017013232e-05, |
|
"loss": 1.964, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 5.3686200378071836e-05, |
|
"loss": 1.9025, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 5.3213610586011344e-05, |
|
"loss": 1.9489, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 5.274102079395086e-05, |
|
"loss": 1.942, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 5.226843100189036e-05, |
|
"loss": 1.9868, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 5.179584120982986e-05, |
|
"loss": 1.9231, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 5.132325141776938e-05, |
|
"loss": 1.9625, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 5.085066162570889e-05, |
|
"loss": 1.9306, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 5.03780718336484e-05, |
|
"loss": 1.9106, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.9905482041587904e-05, |
|
"loss": 1.9209, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 4.943289224952741e-05, |
|
"loss": 1.925, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 5.1, |
|
"learning_rate": 4.896030245746692e-05, |
|
"loss": 1.9368, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 4.848771266540643e-05, |
|
"loss": 1.9076, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"learning_rate": 4.801512287334594e-05, |
|
"loss": 1.8892, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 4.754253308128545e-05, |
|
"loss": 1.9667, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 4.7069943289224955e-05, |
|
"loss": 1.9371, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 4.6597353497164464e-05, |
|
"loss": 1.8549, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 4.612476370510397e-05, |
|
"loss": 1.9157, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 5.43, |
|
"learning_rate": 4.565217391304348e-05, |
|
"loss": 1.9051, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.48, |
|
"learning_rate": 4.517958412098299e-05, |
|
"loss": 1.9175, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 4.47069943289225e-05, |
|
"loss": 1.9367, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 4.423440453686201e-05, |
|
"loss": 1.8351, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 4.3761814744801515e-05, |
|
"loss": 1.8961, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.3289224952741024e-05, |
|
"loss": 1.8583, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 4.281663516068053e-05, |
|
"loss": 1.8909, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"learning_rate": 4.234404536862004e-05, |
|
"loss": 1.8642, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 4.187145557655955e-05, |
|
"loss": 1.9229, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.139886578449906e-05, |
|
"loss": 1.9524, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 4.0926275992438567e-05, |
|
"loss": 1.9143, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 4.045368620037807e-05, |
|
"loss": 1.9229, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.9981096408317584e-05, |
|
"loss": 1.9368, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 3.950850661625709e-05, |
|
"loss": 1.9183, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 3.90359168241966e-05, |
|
"loss": 1.8864, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 3.856332703213611e-05, |
|
"loss": 1.8453, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"learning_rate": 3.809073724007561e-05, |
|
"loss": 1.8785, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.7618147448015126e-05, |
|
"loss": 1.9145, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 3.7145557655954635e-05, |
|
"loss": 1.9477, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 3.6672967863894143e-05, |
|
"loss": 1.8378, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 3.620037807183365e-05, |
|
"loss": 1.843, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 3.572778827977316e-05, |
|
"loss": 1.8386, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 3.525519848771266e-05, |
|
"loss": 1.9, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 3.478260869565218e-05, |
|
"loss": 1.8437, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 3.4310018903591686e-05, |
|
"loss": 1.9301, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 3.3837429111531195e-05, |
|
"loss": 1.8447, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 3.33648393194707e-05, |
|
"loss": 1.8811, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 3.2892249527410205e-05, |
|
"loss": 1.8775, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.2419659735349714e-05, |
|
"loss": 1.9123, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 3.194706994328923e-05, |
|
"loss": 1.8236, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 3.147448015122874e-05, |
|
"loss": 1.8434, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.9, |
|
"learning_rate": 3.1001890359168246e-05, |
|
"loss": 1.8975, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 6.95, |
|
"learning_rate": 3.052930056710775e-05, |
|
"loss": 1.887, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.005671077504726e-05, |
|
"loss": 1.8665, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 2.9584120982986768e-05, |
|
"loss": 1.9142, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 7.09, |
|
"learning_rate": 2.9111531190926277e-05, |
|
"loss": 1.8892, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.863894139886579e-05, |
|
"loss": 1.8762, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 7.18, |
|
"learning_rate": 2.8166351606805297e-05, |
|
"loss": 1.8169, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 7.23, |
|
"learning_rate": 2.7693761814744802e-05, |
|
"loss": 1.8673, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 2.722117202268431e-05, |
|
"loss": 1.8615, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 2.674858223062382e-05, |
|
"loss": 1.8072, |
|
"step": 15500 |
|
} |
|
], |
|
"max_steps": 21160, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.1710826687824896e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|