|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 50, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3e-05, |
|
"loss": 2.9219, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.05291319857312723, |
|
"eval_loss": 2.6484375, |
|
"eval_runtime": 1.5576, |
|
"eval_samples_per_second": 18.618, |
|
"eval_steps_per_second": 1.284, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 3e-05, |
|
"loss": 2.6938, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.05291319857312723, |
|
"eval_loss": 2.6484375, |
|
"eval_runtime": 1.275, |
|
"eval_samples_per_second": 22.746, |
|
"eval_steps_per_second": 1.569, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.9970400926424075e-05, |
|
"loss": 2.6365, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_accuracy": 0.05601796802748051, |
|
"eval_loss": 2.55078125, |
|
"eval_runtime": 1.1095, |
|
"eval_samples_per_second": 26.137, |
|
"eval_steps_per_second": 1.803, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.988172051971717e-05, |
|
"loss": 2.5088, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.056282203725723345, |
|
"eval_loss": 2.533203125, |
|
"eval_runtime": 1.0069, |
|
"eval_samples_per_second": 28.8, |
|
"eval_steps_per_second": 1.986, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9734308760930333e-05, |
|
"loss": 2.7297, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.05667855727308759, |
|
"eval_loss": 2.517578125, |
|
"eval_runtime": 1.0127, |
|
"eval_samples_per_second": 28.637, |
|
"eval_steps_per_second": 1.975, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.9528747416929467e-05, |
|
"loss": 2.9702, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.05720702866957326, |
|
"eval_loss": 2.494140625, |
|
"eval_runtime": 1.2159, |
|
"eval_samples_per_second": 23.851, |
|
"eval_steps_per_second": 1.645, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 2.9265847744427305e-05, |
|
"loss": 2.729, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.05681067512220901, |
|
"eval_loss": 2.48828125, |
|
"eval_runtime": 1.0262, |
|
"eval_samples_per_second": 28.259, |
|
"eval_steps_per_second": 1.949, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.894664728832377e-05, |
|
"loss": 2.6172, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.05780155899061963, |
|
"eval_loss": 2.478515625, |
|
"eval_runtime": 1.3127, |
|
"eval_samples_per_second": 22.092, |
|
"eval_steps_per_second": 1.524, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.8572405786990293e-05, |
|
"loss": 2.6428, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.058065794688862464, |
|
"eval_loss": 2.458984375, |
|
"eval_runtime": 1.0971, |
|
"eval_samples_per_second": 26.432, |
|
"eval_steps_per_second": 1.823, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.8144600200657953e-05, |
|
"loss": 2.5681, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.05899061963271238, |
|
"eval_loss": 2.435546875, |
|
"eval_runtime": 1.3171, |
|
"eval_samples_per_second": 22.018, |
|
"eval_steps_per_second": 1.518, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.7664918882530227e-05, |
|
"loss": 2.1885, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_accuracy": 0.05866032500990884, |
|
"eval_loss": 2.423828125, |
|
"eval_runtime": 1.2181, |
|
"eval_samples_per_second": 23.807, |
|
"eval_steps_per_second": 1.642, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.7135254915624213e-05, |
|
"loss": 1.981, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.05872638393446954, |
|
"eval_loss": 2.421875, |
|
"eval_runtime": 1.6039, |
|
"eval_samples_per_second": 18.081, |
|
"eval_steps_per_second": 1.247, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 2.655769864163684e-05, |
|
"loss": 1.8673, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_accuracy": 0.0591227374818338, |
|
"eval_loss": 2.41796875, |
|
"eval_runtime": 1.0123, |
|
"eval_samples_per_second": 28.649, |
|
"eval_steps_per_second": 1.976, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 2.5934529411321174e-05, |
|
"loss": 1.7321, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.05958514995375875, |
|
"eval_loss": 2.41796875, |
|
"eval_runtime": 1.0152, |
|
"eval_samples_per_second": 28.567, |
|
"eval_steps_per_second": 1.97, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 2.5268206588930332e-05, |
|
"loss": 1.6355, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.060113621350244416, |
|
"eval_loss": 2.41796875, |
|
"eval_runtime": 1.4219, |
|
"eval_samples_per_second": 20.396, |
|
"eval_steps_per_second": 1.407, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 2.4561359846230346e-05, |
|
"loss": 1.7758, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.06017968027480513, |
|
"eval_loss": 2.419921875, |
|
"eval_runtime": 1.618, |
|
"eval_samples_per_second": 17.923, |
|
"eval_steps_per_second": 1.236, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 2.3816778784387097e-05, |
|
"loss": 2.0162, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.06050997489760867, |
|
"eval_loss": 2.408203125, |
|
"eval_runtime": 1.1225, |
|
"eval_samples_per_second": 25.835, |
|
"eval_steps_per_second": 1.782, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.303740192468495e-05, |
|
"loss": 1.8037, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.06050997489760867, |
|
"eval_loss": 2.396484375, |
|
"eval_runtime": 0.912, |
|
"eval_samples_per_second": 31.8, |
|
"eval_steps_per_second": 2.193, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.222630511152573e-05, |
|
"loss": 1.7204, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.0607742105958515, |
|
"eval_loss": 2.375, |
|
"eval_runtime": 1.2202, |
|
"eval_samples_per_second": 23.767, |
|
"eval_steps_per_second": 1.639, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.138668937347609e-05, |
|
"loss": 1.7831, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.060906328444972915, |
|
"eval_loss": 2.357421875, |
|
"eval_runtime": 1.0082, |
|
"eval_samples_per_second": 28.765, |
|
"eval_steps_per_second": 1.984, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.052186829027017e-05, |
|
"loss": 1.299, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_accuracy": 0.06163297661514071, |
|
"eval_loss": 2.349609375, |
|
"eval_runtime": 1.2114, |
|
"eval_samples_per_second": 23.94, |
|
"eval_steps_per_second": 1.651, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.963525491562421e-05, |
|
"loss": 1.4463, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.06196327123794425, |
|
"eval_loss": 2.349609375, |
|
"eval_runtime": 1.5179, |
|
"eval_samples_per_second": 19.105, |
|
"eval_steps_per_second": 1.318, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 1.8730348307472828e-05, |
|
"loss": 1.1733, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"eval_accuracy": 0.061699035539701415, |
|
"eval_loss": 2.365234375, |
|
"eval_runtime": 1.3077, |
|
"eval_samples_per_second": 22.176, |
|
"eval_steps_per_second": 1.529, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 1.781071971878587e-05, |
|
"loss": 1.1142, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_accuracy": 0.06255780155899061, |
|
"eval_loss": 2.388671875, |
|
"eval_runtime": 1.1042, |
|
"eval_samples_per_second": 26.264, |
|
"eval_steps_per_second": 1.811, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.6879998503464565e-05, |
|
"loss": 1.3107, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.06268991940811204, |
|
"eval_loss": 2.421875, |
|
"eval_runtime": 0.9167, |
|
"eval_samples_per_second": 31.634, |
|
"eval_steps_per_second": 2.182, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.5941857792939702e-05, |
|
"loss": 1.011, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_accuracy": 0.06216144801162637, |
|
"eval_loss": 2.455078125, |
|
"eval_runtime": 0.9207, |
|
"eval_samples_per_second": 31.497, |
|
"eval_steps_per_second": 2.172, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.3403, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_accuracy": 0.061566917690579995, |
|
"eval_loss": 2.4765625, |
|
"eval_runtime": 1.5266, |
|
"eval_samples_per_second": 18.997, |
|
"eval_steps_per_second": 1.31, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.3108, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_accuracy": 0.061566917690579995, |
|
"eval_loss": 2.4765625, |
|
"eval_runtime": 1.252, |
|
"eval_samples_per_second": 23.163, |
|
"eval_steps_per_second": 1.597, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.40581422070603e-05, |
|
"loss": 1.0076, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_accuracy": 0.06189721231338354, |
|
"eval_loss": 2.4609375, |
|
"eval_runtime": 0.9112, |
|
"eval_samples_per_second": 31.825, |
|
"eval_steps_per_second": 2.195, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.3120001496535434e-05, |
|
"loss": 0.8656, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.062359624785308494, |
|
"eval_loss": 2.451171875, |
|
"eval_runtime": 1.5156, |
|
"eval_samples_per_second": 19.134, |
|
"eval_steps_per_second": 1.32, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 1.2189280281214128e-05, |
|
"loss": 0.6635, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_accuracy": 0.06282203725723345, |
|
"eval_loss": 2.451171875, |
|
"eval_runtime": 1.313, |
|
"eval_samples_per_second": 22.087, |
|
"eval_steps_per_second": 1.523, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"learning_rate": 1.1269651692527181e-05, |
|
"loss": 0.9996, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_accuracy": 0.06348262650284053, |
|
"eval_loss": 2.443359375, |
|
"eval_runtime": 1.01, |
|
"eval_samples_per_second": 28.711, |
|
"eval_steps_per_second": 1.98, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 1.036474508437579e-05, |
|
"loss": 0.9029, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_accuracy": 0.06368080327652266, |
|
"eval_loss": 2.447265625, |
|
"eval_runtime": 1.4214, |
|
"eval_samples_per_second": 20.402, |
|
"eval_steps_per_second": 1.407, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 9.478131709729831e-06, |
|
"loss": 0.8329, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_accuracy": 0.06374686220108336, |
|
"eval_loss": 2.455078125, |
|
"eval_runtime": 0.9136, |
|
"eval_samples_per_second": 31.742, |
|
"eval_steps_per_second": 2.189, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 8.61331062652391e-06, |
|
"loss": 0.8012, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.06387898005020479, |
|
"eval_loss": 2.46484375, |
|
"eval_runtime": 1.6062, |
|
"eval_samples_per_second": 18.055, |
|
"eval_steps_per_second": 1.245, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 7.773694888474268e-06, |
|
"loss": 0.5814, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_accuracy": 0.0640110978993262, |
|
"eval_loss": 2.490234375, |
|
"eval_runtime": 1.6209, |
|
"eval_samples_per_second": 17.891, |
|
"eval_steps_per_second": 1.234, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 6.962598075315047e-06, |
|
"loss": 1.0688, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_accuracy": 0.06381292112564407, |
|
"eval_loss": 2.509765625, |
|
"eval_runtime": 1.2235, |
|
"eval_samples_per_second": 23.703, |
|
"eval_steps_per_second": 1.635, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 6.1832212156129045e-06, |
|
"loss": 0.8688, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_accuracy": 0.06348262650284053, |
|
"eval_loss": 2.517578125, |
|
"eval_runtime": 1.5088, |
|
"eval_samples_per_second": 19.221, |
|
"eval_steps_per_second": 1.326, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 5.438640153769654e-06, |
|
"loss": 0.7341, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_accuracy": 0.06381292112564407, |
|
"eval_loss": 2.51953125, |
|
"eval_runtime": 1.0209, |
|
"eval_samples_per_second": 28.406, |
|
"eval_steps_per_second": 1.959, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.731793411069669e-06, |
|
"loss": 0.7102, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.0640110978993262, |
|
"eval_loss": 2.51953125, |
|
"eval_runtime": 1.3112, |
|
"eval_samples_per_second": 22.116, |
|
"eval_steps_per_second": 1.525, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 4.06547058867883e-06, |
|
"loss": 0.7079, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"eval_accuracy": 0.06414321574844761, |
|
"eval_loss": 2.51953125, |
|
"eval_runtime": 1.0046, |
|
"eval_samples_per_second": 28.868, |
|
"eval_steps_per_second": 1.991, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.442301358363163e-06, |
|
"loss": 0.7656, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_accuracy": 0.06427533359756903, |
|
"eval_loss": 2.51953125, |
|
"eval_runtime": 1.3174, |
|
"eval_samples_per_second": 22.012, |
|
"eval_steps_per_second": 1.518, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 2.86474508437579e-06, |
|
"loss": 0.6377, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_accuracy": 0.06447351037125115, |
|
"eval_loss": 2.52734375, |
|
"eval_runtime": 1.1043, |
|
"eval_samples_per_second": 26.261, |
|
"eval_steps_per_second": 1.811, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 2.335081117469777e-06, |
|
"loss": 0.5898, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_accuracy": 0.06414321574844761, |
|
"eval_loss": 2.53515625, |
|
"eval_runtime": 1.0226, |
|
"eval_samples_per_second": 28.359, |
|
"eval_steps_per_second": 1.956, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 1.8553997993420495e-06, |
|
"loss": 0.5958, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_accuracy": 0.06407715682388691, |
|
"eval_loss": 2.54296875, |
|
"eval_runtime": 0.91, |
|
"eval_samples_per_second": 31.867, |
|
"eval_steps_per_second": 2.198, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"learning_rate": 1.4275942130097097e-06, |
|
"loss": 0.7048, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 4.6, |
|
"eval_accuracy": 0.0640110978993262, |
|
"eval_loss": 2.548828125, |
|
"eval_runtime": 1.2089, |
|
"eval_samples_per_second": 23.989, |
|
"eval_steps_per_second": 1.654, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.0533527116762298e-06, |
|
"loss": 0.5435, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"eval_accuracy": 0.06414321574844761, |
|
"eval_loss": 2.552734375, |
|
"eval_runtime": 1.3311, |
|
"eval_samples_per_second": 21.786, |
|
"eval_steps_per_second": 1.502, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 7.341522555726971e-07, |
|
"loss": 0.4769, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_accuracy": 0.0640110978993262, |
|
"eval_loss": 2.552734375, |
|
"eval_runtime": 1.0089, |
|
"eval_samples_per_second": 28.743, |
|
"eval_steps_per_second": 1.982, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.7125258307053385e-07, |
|
"loss": 0.6583, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_accuracy": 0.06420927467300833, |
|
"eval_loss": 2.5546875, |
|
"eval_runtime": 1.0099, |
|
"eval_samples_per_second": 28.715, |
|
"eval_steps_per_second": 1.98, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 2.6569123906967083e-07, |
|
"loss": 0.7168, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.06414321574844761, |
|
"eval_loss": 2.5546875, |
|
"eval_runtime": 1.5134, |
|
"eval_samples_per_second": 19.163, |
|
"eval_steps_per_second": 1.322, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 50, |
|
"total_flos": 2477483753472.0, |
|
"train_loss": 1.4392181396484376, |
|
"train_runtime": 183.8573, |
|
"train_samples_per_second": 4.242, |
|
"train_steps_per_second": 0.272 |
|
} |
|
], |
|
"max_steps": 50, |
|
"num_train_epochs": 5, |
|
"total_flos": 2477483753472.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|