|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"global_step": 80, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0, |
|
"loss": 3.2024, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.038495934959349595, |
|
"eval_loss": 2.912109375, |
|
"eval_runtime": 2.0553, |
|
"eval_samples_per_second": 48.656, |
|
"eval_steps_per_second": 0.973, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0, |
|
"loss": 3.1226, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.038495934959349595, |
|
"eval_loss": 2.912109375, |
|
"eval_runtime": 2.5287, |
|
"eval_samples_per_second": 39.546, |
|
"eval_steps_per_second": 0.791, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0, |
|
"loss": 3.1321, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_accuracy": 0.03936991869918699, |
|
"eval_loss": 2.84765625, |
|
"eval_runtime": 2.4969, |
|
"eval_samples_per_second": 40.05, |
|
"eval_steps_per_second": 0.801, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9875, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.03936991869918699, |
|
"eval_loss": 2.84765625, |
|
"eval_runtime": 2.5942, |
|
"eval_samples_per_second": 38.548, |
|
"eval_steps_per_second": 0.771, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9717, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.03910569105691057, |
|
"eval_loss": 2.85546875, |
|
"eval_runtime": 2.4958, |
|
"eval_samples_per_second": 40.068, |
|
"eval_steps_per_second": 0.801, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9341, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.03920731707317073, |
|
"eval_loss": 2.84375, |
|
"eval_runtime": 2.6044, |
|
"eval_samples_per_second": 38.396, |
|
"eval_steps_per_second": 0.768, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0376, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.03957317073170732, |
|
"eval_loss": 2.818359375, |
|
"eval_runtime": 2.2607, |
|
"eval_samples_per_second": 44.235, |
|
"eval_steps_per_second": 0.885, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8164, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.039532520325203255, |
|
"eval_loss": 2.798828125, |
|
"eval_runtime": 2.0579, |
|
"eval_samples_per_second": 48.594, |
|
"eval_steps_per_second": 0.972, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0857, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.03936991869918699, |
|
"eval_loss": 2.798828125, |
|
"eval_runtime": 2.0436, |
|
"eval_samples_per_second": 48.933, |
|
"eval_steps_per_second": 0.979, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9492, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.03947154471544716, |
|
"eval_loss": 2.796875, |
|
"eval_runtime": 2.6006, |
|
"eval_samples_per_second": 38.452, |
|
"eval_steps_per_second": 0.769, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8633, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_accuracy": 0.03947154471544716, |
|
"eval_loss": 2.796875, |
|
"eval_runtime": 2.0146, |
|
"eval_samples_per_second": 49.639, |
|
"eval_steps_per_second": 0.993, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.8994, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.03979674796747967, |
|
"eval_loss": 2.791015625, |
|
"eval_runtime": 1.8459, |
|
"eval_samples_per_second": 54.174, |
|
"eval_steps_per_second": 1.083, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0024, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.040121951219512196, |
|
"eval_loss": 2.78125, |
|
"eval_runtime": 2.5067, |
|
"eval_samples_per_second": 39.892, |
|
"eval_steps_per_second": 0.798, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.937, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.03991869918699187, |
|
"eval_loss": 2.78125, |
|
"eval_runtime": 2.3271, |
|
"eval_samples_per_second": 42.972, |
|
"eval_steps_per_second": 0.859, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.9963, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.039878048780487806, |
|
"eval_loss": 2.78125, |
|
"eval_runtime": 1.8105, |
|
"eval_samples_per_second": 55.232, |
|
"eval_steps_per_second": 1.105, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 3.0168, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.04, |
|
"eval_loss": 2.775390625, |
|
"eval_runtime": 2.6001, |
|
"eval_samples_per_second": 38.46, |
|
"eval_steps_per_second": 0.769, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2589, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_accuracy": 0.03971544715447155, |
|
"eval_loss": 2.771484375, |
|
"eval_runtime": 2.0378, |
|
"eval_samples_per_second": 49.072, |
|
"eval_steps_per_second": 0.981, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2568, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.03951219512195122, |
|
"eval_loss": 2.779296875, |
|
"eval_runtime": 2.5986, |
|
"eval_samples_per_second": 38.482, |
|
"eval_steps_per_second": 0.77, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3138, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_accuracy": 0.03930894308943089, |
|
"eval_loss": 2.802734375, |
|
"eval_runtime": 2.2441, |
|
"eval_samples_per_second": 44.561, |
|
"eval_steps_per_second": 0.891, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2759, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.039288617886178864, |
|
"eval_loss": 2.818359375, |
|
"eval_runtime": 2.3006, |
|
"eval_samples_per_second": 43.467, |
|
"eval_steps_per_second": 0.869, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 5e-05, |
|
"loss": 2.5137, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_accuracy": 0.039044715447154474, |
|
"eval_loss": 2.826171875, |
|
"eval_runtime": 2.6018, |
|
"eval_samples_per_second": 38.434, |
|
"eval_steps_per_second": 0.769, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2997, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_accuracy": 0.03878048780487805, |
|
"eval_loss": 2.83203125, |
|
"eval_runtime": 2.6159, |
|
"eval_samples_per_second": 38.228, |
|
"eval_steps_per_second": 0.765, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2693, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_accuracy": 0.03916666666666667, |
|
"eval_loss": 2.8359375, |
|
"eval_runtime": 2.0432, |
|
"eval_samples_per_second": 48.943, |
|
"eval_steps_per_second": 0.979, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 5e-05, |
|
"loss": 2.204, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"eval_accuracy": 0.038739837398373986, |
|
"eval_loss": 2.837890625, |
|
"eval_runtime": 2.2494, |
|
"eval_samples_per_second": 44.456, |
|
"eval_steps_per_second": 0.889, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3713, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.039146341463414634, |
|
"eval_loss": 2.8359375, |
|
"eval_runtime": 2.0551, |
|
"eval_samples_per_second": 48.659, |
|
"eval_steps_per_second": 0.973, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3448, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_accuracy": 0.03910569105691057, |
|
"eval_loss": 2.833984375, |
|
"eval_runtime": 2.6076, |
|
"eval_samples_per_second": 38.349, |
|
"eval_steps_per_second": 0.767, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 5e-05, |
|
"loss": 2.217, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.03910569105691057, |
|
"eval_loss": 2.8359375, |
|
"eval_runtime": 1.8526, |
|
"eval_samples_per_second": 53.979, |
|
"eval_steps_per_second": 1.08, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3082, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_accuracy": 0.038455284552845526, |
|
"eval_loss": 2.837890625, |
|
"eval_runtime": 2.2452, |
|
"eval_samples_per_second": 44.54, |
|
"eval_steps_per_second": 0.891, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2878, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.03855691056910569, |
|
"eval_loss": 2.837890625, |
|
"eval_runtime": 2.2035, |
|
"eval_samples_per_second": 45.381, |
|
"eval_steps_per_second": 0.908, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2429, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_accuracy": 0.03847560975609756, |
|
"eval_loss": 2.837890625, |
|
"eval_runtime": 2.0495, |
|
"eval_samples_per_second": 48.792, |
|
"eval_steps_per_second": 0.976, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2838, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.03851626016260162, |
|
"eval_loss": 2.8359375, |
|
"eval_runtime": 2.5995, |
|
"eval_samples_per_second": 38.469, |
|
"eval_steps_per_second": 0.769, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4038, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.038739837398373986, |
|
"eval_loss": 2.837890625, |
|
"eval_runtime": 2.6048, |
|
"eval_samples_per_second": 38.391, |
|
"eval_steps_per_second": 0.768, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 5e-05, |
|
"loss": 1.8481, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_accuracy": 0.03841463414634146, |
|
"eval_loss": 2.85546875, |
|
"eval_runtime": 2.6153, |
|
"eval_samples_per_second": 38.237, |
|
"eval_steps_per_second": 0.765, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 5e-05, |
|
"loss": 1.657, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_accuracy": 0.038211382113821135, |
|
"eval_loss": 2.896484375, |
|
"eval_runtime": 2.1039, |
|
"eval_samples_per_second": 47.53, |
|
"eval_steps_per_second": 0.951, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6996, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_accuracy": 0.03804878048780488, |
|
"eval_loss": 2.958984375, |
|
"eval_runtime": 2.051, |
|
"eval_samples_per_second": 48.757, |
|
"eval_steps_per_second": 0.975, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 5e-05, |
|
"loss": 1.6741, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.037865853658536584, |
|
"eval_loss": 3.03125, |
|
"eval_runtime": 2.6053, |
|
"eval_samples_per_second": 38.383, |
|
"eval_steps_per_second": 0.768, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 5e-05, |
|
"loss": 1.594, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.037967479674796745, |
|
"eval_loss": 3.041015625, |
|
"eval_runtime": 2.5975, |
|
"eval_samples_per_second": 38.498, |
|
"eval_steps_per_second": 0.77, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5201, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.038109756097560975, |
|
"eval_loss": 3.015625, |
|
"eval_runtime": 2.6045, |
|
"eval_samples_per_second": 38.396, |
|
"eval_steps_per_second": 0.768, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5149, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"eval_accuracy": 0.03798780487804878, |
|
"eval_loss": 3.013671875, |
|
"eval_runtime": 2.3613, |
|
"eval_samples_per_second": 42.349, |
|
"eval_steps_per_second": 0.847, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5521, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.03788617886178862, |
|
"eval_loss": 3.017578125, |
|
"eval_runtime": 2.601, |
|
"eval_samples_per_second": 38.446, |
|
"eval_steps_per_second": 0.769, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5364, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_accuracy": 0.03780487804878049, |
|
"eval_loss": 3.02734375, |
|
"eval_runtime": 2.6044, |
|
"eval_samples_per_second": 38.397, |
|
"eval_steps_per_second": 0.768, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5385, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.037967479674796745, |
|
"eval_loss": 3.0390625, |
|
"eval_runtime": 2.5064, |
|
"eval_samples_per_second": 39.899, |
|
"eval_steps_per_second": 0.798, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4794, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_accuracy": 0.038008130081300814, |
|
"eval_loss": 3.048828125, |
|
"eval_runtime": 2.1015, |
|
"eval_samples_per_second": 47.584, |
|
"eval_steps_per_second": 0.952, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4313, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_accuracy": 0.03782520325203252, |
|
"eval_loss": 3.052734375, |
|
"eval_runtime": 2.4538, |
|
"eval_samples_per_second": 40.753, |
|
"eval_steps_per_second": 0.815, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5071, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"eval_accuracy": 0.03784552845528455, |
|
"eval_loss": 3.046875, |
|
"eval_runtime": 2.1003, |
|
"eval_samples_per_second": 47.611, |
|
"eval_steps_per_second": 0.952, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4799, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_accuracy": 0.037764227642276424, |
|
"eval_loss": 3.044921875, |
|
"eval_runtime": 2.6039, |
|
"eval_samples_per_second": 38.404, |
|
"eval_steps_per_second": 0.768, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 5e-05, |
|
"loss": 1.521, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_accuracy": 0.037967479674796745, |
|
"eval_loss": 3.037109375, |
|
"eval_runtime": 2.6013, |
|
"eval_samples_per_second": 38.443, |
|
"eval_steps_per_second": 0.769, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4603, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.03788617886178862, |
|
"eval_loss": 3.041015625, |
|
"eval_runtime": 2.0826, |
|
"eval_samples_per_second": 48.016, |
|
"eval_steps_per_second": 0.96, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 5e-05, |
|
"loss": 1.25, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"eval_accuracy": 0.03813008130081301, |
|
"eval_loss": 3.0859375, |
|
"eval_runtime": 2.5454, |
|
"eval_samples_per_second": 39.287, |
|
"eval_steps_per_second": 0.786, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0411, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.03754065040650407, |
|
"eval_loss": 3.1796875, |
|
"eval_runtime": 2.6016, |
|
"eval_samples_per_second": 38.438, |
|
"eval_steps_per_second": 0.769, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0385, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_accuracy": 0.037113821138211385, |
|
"eval_loss": 3.296875, |
|
"eval_runtime": 2.6072, |
|
"eval_samples_per_second": 38.355, |
|
"eval_steps_per_second": 0.767, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0254, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.03670731707317073, |
|
"eval_loss": 3.361328125, |
|
"eval_runtime": 2.2651, |
|
"eval_samples_per_second": 44.148, |
|
"eval_steps_per_second": 0.883, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9656, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"eval_accuracy": 0.036829268292682925, |
|
"eval_loss": 3.36328125, |
|
"eval_runtime": 2.6237, |
|
"eval_samples_per_second": 38.115, |
|
"eval_steps_per_second": 0.762, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 5e-05, |
|
"loss": 1.036, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_accuracy": 0.03664634146341463, |
|
"eval_loss": 3.3359375, |
|
"eval_runtime": 2.5895, |
|
"eval_samples_per_second": 38.618, |
|
"eval_steps_per_second": 0.772, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9366, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_accuracy": 0.036565040650406506, |
|
"eval_loss": 3.294921875, |
|
"eval_runtime": 2.2574, |
|
"eval_samples_per_second": 44.298, |
|
"eval_steps_per_second": 0.886, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9712, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_accuracy": 0.03666666666666667, |
|
"eval_loss": 3.26953125, |
|
"eval_runtime": 2.6146, |
|
"eval_samples_per_second": 38.247, |
|
"eval_steps_per_second": 0.765, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0066, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_accuracy": 0.036565040650406506, |
|
"eval_loss": 3.267578125, |
|
"eval_runtime": 2.6062, |
|
"eval_samples_per_second": 38.37, |
|
"eval_steps_per_second": 0.767, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9952, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_accuracy": 0.03676829268292683, |
|
"eval_loss": 3.27734375, |
|
"eval_runtime": 2.5965, |
|
"eval_samples_per_second": 38.513, |
|
"eval_steps_per_second": 0.77, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0352, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_accuracy": 0.0366869918699187, |
|
"eval_loss": 3.2890625, |
|
"eval_runtime": 2.3047, |
|
"eval_samples_per_second": 43.39, |
|
"eval_steps_per_second": 0.868, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0212, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"eval_accuracy": 0.036178861788617886, |
|
"eval_loss": 3.31640625, |
|
"eval_runtime": 2.6015, |
|
"eval_samples_per_second": 38.439, |
|
"eval_steps_per_second": 0.769, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9468, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_accuracy": 0.036036585365853656, |
|
"eval_loss": 3.3203125, |
|
"eval_runtime": 2.2607, |
|
"eval_samples_per_second": 44.233, |
|
"eval_steps_per_second": 0.885, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9155, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_accuracy": 0.03664634146341463, |
|
"eval_loss": 3.322265625, |
|
"eval_runtime": 2.6074, |
|
"eval_samples_per_second": 38.352, |
|
"eval_steps_per_second": 0.767, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8552, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_accuracy": 0.03701219512195122, |
|
"eval_loss": 3.326171875, |
|
"eval_runtime": 2.2425, |
|
"eval_samples_per_second": 44.594, |
|
"eval_steps_per_second": 0.892, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9575, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.03699186991869919, |
|
"eval_loss": 3.333984375, |
|
"eval_runtime": 2.0082, |
|
"eval_samples_per_second": 49.796, |
|
"eval_steps_per_second": 0.996, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6384, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_accuracy": 0.036971544715447155, |
|
"eval_loss": 3.375, |
|
"eval_runtime": 2.5098, |
|
"eval_samples_per_second": 39.844, |
|
"eval_steps_per_second": 0.797, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6436, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"eval_accuracy": 0.03636178861788618, |
|
"eval_loss": 3.4453125, |
|
"eval_runtime": 2.6062, |
|
"eval_samples_per_second": 38.37, |
|
"eval_steps_per_second": 0.767, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5752, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 4.19, |
|
"eval_accuracy": 0.035792682926829265, |
|
"eval_loss": 3.5390625, |
|
"eval_runtime": 2.6001, |
|
"eval_samples_per_second": 38.46, |
|
"eval_steps_per_second": 0.769, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6542, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_accuracy": 0.03540650406504065, |
|
"eval_loss": 3.6015625, |
|
"eval_runtime": 2.4568, |
|
"eval_samples_per_second": 40.704, |
|
"eval_steps_per_second": 0.814, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6724, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_accuracy": 0.03540650406504065, |
|
"eval_loss": 3.6015625, |
|
"eval_runtime": 2.0755, |
|
"eval_samples_per_second": 48.182, |
|
"eval_steps_per_second": 0.964, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 5e-05, |
|
"loss": 0.591, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_accuracy": 0.03591463414634146, |
|
"eval_loss": 3.59375, |
|
"eval_runtime": 2.3021, |
|
"eval_samples_per_second": 43.439, |
|
"eval_steps_per_second": 0.869, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5346, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_accuracy": 0.03613821138211382, |
|
"eval_loss": 3.580078125, |
|
"eval_runtime": 2.0073, |
|
"eval_samples_per_second": 49.819, |
|
"eval_steps_per_second": 0.996, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5112, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"eval_accuracy": 0.036077235772357726, |
|
"eval_loss": 3.576171875, |
|
"eval_runtime": 2.6065, |
|
"eval_samples_per_second": 38.365, |
|
"eval_steps_per_second": 0.767, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5443, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_accuracy": 0.03619918699186992, |
|
"eval_loss": 3.583984375, |
|
"eval_runtime": 2.6395, |
|
"eval_samples_per_second": 37.886, |
|
"eval_steps_per_second": 0.758, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5689, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"eval_accuracy": 0.035833333333333335, |
|
"eval_loss": 3.615234375, |
|
"eval_runtime": 2.4972, |
|
"eval_samples_per_second": 40.046, |
|
"eval_steps_per_second": 0.801, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5667, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"eval_accuracy": 0.0357520325203252, |
|
"eval_loss": 3.6328125, |
|
"eval_runtime": 2.4508, |
|
"eval_samples_per_second": 40.803, |
|
"eval_steps_per_second": 0.816, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 5e-05, |
|
"loss": 0.554, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_accuracy": 0.03573170731707317, |
|
"eval_loss": 3.634765625, |
|
"eval_runtime": 2.0574, |
|
"eval_samples_per_second": 48.604, |
|
"eval_steps_per_second": 0.972, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6087, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.81, |
|
"eval_accuracy": 0.03552845528455285, |
|
"eval_loss": 3.625, |
|
"eval_runtime": 2.6082, |
|
"eval_samples_per_second": 38.34, |
|
"eval_steps_per_second": 0.767, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5236, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_accuracy": 0.03552845528455285, |
|
"eval_loss": 3.615234375, |
|
"eval_runtime": 1.8631, |
|
"eval_samples_per_second": 53.674, |
|
"eval_steps_per_second": 1.073, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5458, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_accuracy": 0.03550813008130081, |
|
"eval_loss": 3.578125, |
|
"eval_runtime": 2.6009, |
|
"eval_samples_per_second": 38.449, |
|
"eval_steps_per_second": 0.769, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5702, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.03550813008130081, |
|
"eval_loss": 3.548828125, |
|
"eval_runtime": 2.5127, |
|
"eval_samples_per_second": 39.798, |
|
"eval_steps_per_second": 0.796, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 80, |
|
"total_flos": 9903514583040.0, |
|
"train_loss": 1.6890087127685547, |
|
"train_runtime": 406.6939, |
|
"train_samples_per_second": 12.294, |
|
"train_steps_per_second": 0.197 |
|
} |
|
], |
|
"max_steps": 80, |
|
"num_train_epochs": 5, |
|
"total_flos": 9903514583040.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|