{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "learning_rate": 0, "loss": 3.2024, "step": 1 }, { "epoch": 0.06, "eval_accuracy": 0.038495934959349595, "eval_loss": 2.912109375, "eval_runtime": 2.0553, "eval_samples_per_second": 48.656, "eval_steps_per_second": 0.973, "step": 1 }, { "epoch": 0.12, "learning_rate": 0, "loss": 3.1226, "step": 2 }, { "epoch": 0.12, "eval_accuracy": 0.038495934959349595, "eval_loss": 2.912109375, "eval_runtime": 2.5287, "eval_samples_per_second": 39.546, "eval_steps_per_second": 0.791, "step": 2 }, { "epoch": 0.19, "learning_rate": 0.0, "loss": 3.1321, "step": 3 }, { "epoch": 0.19, "eval_accuracy": 0.03936991869918699, "eval_loss": 2.84765625, "eval_runtime": 2.4969, "eval_samples_per_second": 40.05, "eval_steps_per_second": 0.801, "step": 3 }, { "epoch": 0.25, "learning_rate": 5e-05, "loss": 2.9875, "step": 4 }, { "epoch": 0.25, "eval_accuracy": 0.03936991869918699, "eval_loss": 2.84765625, "eval_runtime": 2.5942, "eval_samples_per_second": 38.548, "eval_steps_per_second": 0.771, "step": 4 }, { "epoch": 0.31, "learning_rate": 5e-05, "loss": 2.9717, "step": 5 }, { "epoch": 0.31, "eval_accuracy": 0.03910569105691057, "eval_loss": 2.85546875, "eval_runtime": 2.4958, "eval_samples_per_second": 40.068, "eval_steps_per_second": 0.801, "step": 5 }, { "epoch": 0.38, "learning_rate": 5e-05, "loss": 2.9341, "step": 6 }, { "epoch": 0.38, "eval_accuracy": 0.03920731707317073, "eval_loss": 2.84375, "eval_runtime": 2.6044, "eval_samples_per_second": 38.396, "eval_steps_per_second": 0.768, "step": 6 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 3.0376, "step": 7 }, { "epoch": 0.44, "eval_accuracy": 0.03957317073170732, "eval_loss": 2.818359375, "eval_runtime": 2.2607, "eval_samples_per_second": 44.235, "eval_steps_per_second": 0.885, "step": 7 }, { "epoch": 0.5, "learning_rate": 5e-05, "loss": 2.8164, "step": 8 }, { "epoch": 0.5, "eval_accuracy": 0.039532520325203255, "eval_loss": 2.798828125, "eval_runtime": 2.0579, "eval_samples_per_second": 48.594, "eval_steps_per_second": 0.972, "step": 8 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 3.0857, "step": 9 }, { "epoch": 0.56, "eval_accuracy": 0.03936991869918699, "eval_loss": 2.798828125, "eval_runtime": 2.0436, "eval_samples_per_second": 48.933, "eval_steps_per_second": 0.979, "step": 9 }, { "epoch": 0.62, "learning_rate": 5e-05, "loss": 2.9492, "step": 10 }, { "epoch": 0.62, "eval_accuracy": 0.03947154471544716, "eval_loss": 2.796875, "eval_runtime": 2.6006, "eval_samples_per_second": 38.452, "eval_steps_per_second": 0.769, "step": 10 }, { "epoch": 0.69, "learning_rate": 5e-05, "loss": 2.8633, "step": 11 }, { "epoch": 0.69, "eval_accuracy": 0.03947154471544716, "eval_loss": 2.796875, "eval_runtime": 2.0146, "eval_samples_per_second": 49.639, "eval_steps_per_second": 0.993, "step": 11 }, { "epoch": 0.75, "learning_rate": 5e-05, "loss": 2.8994, "step": 12 }, { "epoch": 0.75, "eval_accuracy": 0.03979674796747967, "eval_loss": 2.791015625, "eval_runtime": 1.8459, "eval_samples_per_second": 54.174, "eval_steps_per_second": 1.083, "step": 12 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 3.0024, "step": 13 }, { "epoch": 0.81, "eval_accuracy": 0.040121951219512196, "eval_loss": 2.78125, "eval_runtime": 2.5067, "eval_samples_per_second": 39.892, "eval_steps_per_second": 0.798, "step": 13 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.937, "step": 14 }, { "epoch": 0.88, "eval_accuracy": 0.03991869918699187, "eval_loss": 2.78125, "eval_runtime": 2.3271, "eval_samples_per_second": 42.972, "eval_steps_per_second": 0.859, "step": 14 }, { "epoch": 0.94, "learning_rate": 5e-05, "loss": 2.9963, "step": 15 }, { "epoch": 0.94, "eval_accuracy": 0.039878048780487806, "eval_loss": 2.78125, "eval_runtime": 1.8105, "eval_samples_per_second": 55.232, "eval_steps_per_second": 1.105, "step": 15 }, { "epoch": 1.0, "learning_rate": 5e-05, "loss": 3.0168, "step": 16 }, { "epoch": 1.0, "eval_accuracy": 0.04, "eval_loss": 2.775390625, "eval_runtime": 2.6001, "eval_samples_per_second": 38.46, "eval_steps_per_second": 0.769, "step": 16 }, { "epoch": 1.06, "learning_rate": 5e-05, "loss": 2.2589, "step": 17 }, { "epoch": 1.06, "eval_accuracy": 0.03971544715447155, "eval_loss": 2.771484375, "eval_runtime": 2.0378, "eval_samples_per_second": 49.072, "eval_steps_per_second": 0.981, "step": 17 }, { "epoch": 1.12, "learning_rate": 5e-05, "loss": 2.2568, "step": 18 }, { "epoch": 1.12, "eval_accuracy": 0.03951219512195122, "eval_loss": 2.779296875, "eval_runtime": 2.5986, "eval_samples_per_second": 38.482, "eval_steps_per_second": 0.77, "step": 18 }, { "epoch": 1.19, "learning_rate": 5e-05, "loss": 2.3138, "step": 19 }, { "epoch": 1.19, "eval_accuracy": 0.03930894308943089, "eval_loss": 2.802734375, "eval_runtime": 2.2441, "eval_samples_per_second": 44.561, "eval_steps_per_second": 0.891, "step": 19 }, { "epoch": 1.25, "learning_rate": 5e-05, "loss": 2.2759, "step": 20 }, { "epoch": 1.25, "eval_accuracy": 0.039288617886178864, "eval_loss": 2.818359375, "eval_runtime": 2.3006, "eval_samples_per_second": 43.467, "eval_steps_per_second": 0.869, "step": 20 }, { "epoch": 1.31, "learning_rate": 5e-05, "loss": 2.5137, "step": 21 }, { "epoch": 1.31, "eval_accuracy": 0.039044715447154474, "eval_loss": 2.826171875, "eval_runtime": 2.6018, "eval_samples_per_second": 38.434, "eval_steps_per_second": 0.769, "step": 21 }, { "epoch": 1.38, "learning_rate": 5e-05, "loss": 2.2997, "step": 22 }, { "epoch": 1.38, "eval_accuracy": 0.03878048780487805, "eval_loss": 2.83203125, "eval_runtime": 2.6159, "eval_samples_per_second": 38.228, "eval_steps_per_second": 0.765, "step": 22 }, { "epoch": 1.44, "learning_rate": 5e-05, "loss": 2.2693, "step": 23 }, { "epoch": 1.44, "eval_accuracy": 0.03916666666666667, "eval_loss": 2.8359375, "eval_runtime": 2.0432, "eval_samples_per_second": 48.943, "eval_steps_per_second": 0.979, "step": 23 }, { "epoch": 1.5, "learning_rate": 5e-05, "loss": 2.204, "step": 24 }, { "epoch": 1.5, "eval_accuracy": 0.038739837398373986, "eval_loss": 2.837890625, "eval_runtime": 2.2494, "eval_samples_per_second": 44.456, "eval_steps_per_second": 0.889, "step": 24 }, { "epoch": 1.56, "learning_rate": 5e-05, "loss": 2.3713, "step": 25 }, { "epoch": 1.56, "eval_accuracy": 0.039146341463414634, "eval_loss": 2.8359375, "eval_runtime": 2.0551, "eval_samples_per_second": 48.659, "eval_steps_per_second": 0.973, "step": 25 }, { "epoch": 1.62, "learning_rate": 5e-05, "loss": 2.3448, "step": 26 }, { "epoch": 1.62, "eval_accuracy": 0.03910569105691057, "eval_loss": 2.833984375, "eval_runtime": 2.6076, "eval_samples_per_second": 38.349, "eval_steps_per_second": 0.767, "step": 26 }, { "epoch": 1.69, "learning_rate": 5e-05, "loss": 2.217, "step": 27 }, { "epoch": 1.69, "eval_accuracy": 0.03910569105691057, "eval_loss": 2.8359375, "eval_runtime": 1.8526, "eval_samples_per_second": 53.979, "eval_steps_per_second": 1.08, "step": 27 }, { "epoch": 1.75, "learning_rate": 5e-05, "loss": 2.3082, "step": 28 }, { "epoch": 1.75, "eval_accuracy": 0.038455284552845526, "eval_loss": 2.837890625, "eval_runtime": 2.2452, "eval_samples_per_second": 44.54, "eval_steps_per_second": 0.891, "step": 28 }, { "epoch": 1.81, "learning_rate": 5e-05, "loss": 2.2878, "step": 29 }, { "epoch": 1.81, "eval_accuracy": 0.03855691056910569, "eval_loss": 2.837890625, "eval_runtime": 2.2035, "eval_samples_per_second": 45.381, "eval_steps_per_second": 0.908, "step": 29 }, { "epoch": 1.88, "learning_rate": 5e-05, "loss": 2.2429, "step": 30 }, { "epoch": 1.88, "eval_accuracy": 0.03847560975609756, "eval_loss": 2.837890625, "eval_runtime": 2.0495, "eval_samples_per_second": 48.792, "eval_steps_per_second": 0.976, "step": 30 }, { "epoch": 1.94, "learning_rate": 5e-05, "loss": 2.2838, "step": 31 }, { "epoch": 1.94, "eval_accuracy": 0.03851626016260162, "eval_loss": 2.8359375, "eval_runtime": 2.5995, "eval_samples_per_second": 38.469, "eval_steps_per_second": 0.769, "step": 31 }, { "epoch": 2.0, "learning_rate": 5e-05, "loss": 2.4038, "step": 32 }, { "epoch": 2.0, "eval_accuracy": 0.038739837398373986, "eval_loss": 2.837890625, "eval_runtime": 2.6048, "eval_samples_per_second": 38.391, "eval_steps_per_second": 0.768, "step": 32 }, { "epoch": 2.06, "learning_rate": 5e-05, "loss": 1.8481, "step": 33 }, { "epoch": 2.06, "eval_accuracy": 0.03841463414634146, "eval_loss": 2.85546875, "eval_runtime": 2.6153, "eval_samples_per_second": 38.237, "eval_steps_per_second": 0.765, "step": 33 }, { "epoch": 2.12, "learning_rate": 5e-05, "loss": 1.657, "step": 34 }, { "epoch": 2.12, "eval_accuracy": 0.038211382113821135, "eval_loss": 2.896484375, "eval_runtime": 2.1039, "eval_samples_per_second": 47.53, "eval_steps_per_second": 0.951, "step": 34 }, { "epoch": 2.19, "learning_rate": 5e-05, "loss": 1.6996, "step": 35 }, { "epoch": 2.19, "eval_accuracy": 0.03804878048780488, "eval_loss": 2.958984375, "eval_runtime": 2.051, "eval_samples_per_second": 48.757, "eval_steps_per_second": 0.975, "step": 35 }, { "epoch": 2.25, "learning_rate": 5e-05, "loss": 1.6741, "step": 36 }, { "epoch": 2.25, "eval_accuracy": 0.037865853658536584, "eval_loss": 3.03125, "eval_runtime": 2.6053, "eval_samples_per_second": 38.383, "eval_steps_per_second": 0.768, "step": 36 }, { "epoch": 2.31, "learning_rate": 5e-05, "loss": 1.594, "step": 37 }, { "epoch": 2.31, "eval_accuracy": 0.037967479674796745, "eval_loss": 3.041015625, "eval_runtime": 2.5975, "eval_samples_per_second": 38.498, "eval_steps_per_second": 0.77, "step": 37 }, { "epoch": 2.38, "learning_rate": 5e-05, "loss": 1.5201, "step": 38 }, { "epoch": 2.38, "eval_accuracy": 0.038109756097560975, "eval_loss": 3.015625, "eval_runtime": 2.6045, "eval_samples_per_second": 38.396, "eval_steps_per_second": 0.768, "step": 38 }, { "epoch": 2.44, "learning_rate": 5e-05, "loss": 1.5149, "step": 39 }, { "epoch": 2.44, "eval_accuracy": 0.03798780487804878, "eval_loss": 3.013671875, "eval_runtime": 2.3613, "eval_samples_per_second": 42.349, "eval_steps_per_second": 0.847, "step": 39 }, { "epoch": 2.5, "learning_rate": 5e-05, "loss": 1.5521, "step": 40 }, { "epoch": 2.5, "eval_accuracy": 0.03788617886178862, "eval_loss": 3.017578125, "eval_runtime": 2.601, "eval_samples_per_second": 38.446, "eval_steps_per_second": 0.769, "step": 40 }, { "epoch": 2.56, "learning_rate": 5e-05, "loss": 1.5364, "step": 41 }, { "epoch": 2.56, "eval_accuracy": 0.03780487804878049, "eval_loss": 3.02734375, "eval_runtime": 2.6044, "eval_samples_per_second": 38.397, "eval_steps_per_second": 0.768, "step": 41 }, { "epoch": 2.62, "learning_rate": 5e-05, "loss": 1.5385, "step": 42 }, { "epoch": 2.62, "eval_accuracy": 0.037967479674796745, "eval_loss": 3.0390625, "eval_runtime": 2.5064, "eval_samples_per_second": 39.899, "eval_steps_per_second": 0.798, "step": 42 }, { "epoch": 2.69, "learning_rate": 5e-05, "loss": 1.4794, "step": 43 }, { "epoch": 2.69, "eval_accuracy": 0.038008130081300814, "eval_loss": 3.048828125, "eval_runtime": 2.1015, "eval_samples_per_second": 47.584, "eval_steps_per_second": 0.952, "step": 43 }, { "epoch": 2.75, "learning_rate": 5e-05, "loss": 1.4313, "step": 44 }, { "epoch": 2.75, "eval_accuracy": 0.03782520325203252, "eval_loss": 3.052734375, "eval_runtime": 2.4538, "eval_samples_per_second": 40.753, "eval_steps_per_second": 0.815, "step": 44 }, { "epoch": 2.81, "learning_rate": 5e-05, "loss": 1.5071, "step": 45 }, { "epoch": 2.81, "eval_accuracy": 0.03784552845528455, "eval_loss": 3.046875, "eval_runtime": 2.1003, "eval_samples_per_second": 47.611, "eval_steps_per_second": 0.952, "step": 45 }, { "epoch": 2.88, "learning_rate": 5e-05, "loss": 1.4799, "step": 46 }, { "epoch": 2.88, "eval_accuracy": 0.037764227642276424, "eval_loss": 3.044921875, "eval_runtime": 2.6039, "eval_samples_per_second": 38.404, "eval_steps_per_second": 0.768, "step": 46 }, { "epoch": 2.94, "learning_rate": 5e-05, "loss": 1.521, "step": 47 }, { "epoch": 2.94, "eval_accuracy": 0.037967479674796745, "eval_loss": 3.037109375, "eval_runtime": 2.6013, "eval_samples_per_second": 38.443, "eval_steps_per_second": 0.769, "step": 47 }, { "epoch": 3.0, "learning_rate": 5e-05, "loss": 1.4603, "step": 48 }, { "epoch": 3.0, "eval_accuracy": 0.03788617886178862, "eval_loss": 3.041015625, "eval_runtime": 2.0826, "eval_samples_per_second": 48.016, "eval_steps_per_second": 0.96, "step": 48 }, { "epoch": 3.06, "learning_rate": 5e-05, "loss": 1.25, "step": 49 }, { "epoch": 3.06, "eval_accuracy": 0.03813008130081301, "eval_loss": 3.0859375, "eval_runtime": 2.5454, "eval_samples_per_second": 39.287, "eval_steps_per_second": 0.786, "step": 49 }, { "epoch": 3.12, "learning_rate": 5e-05, "loss": 1.0411, "step": 50 }, { "epoch": 3.12, "eval_accuracy": 0.03754065040650407, "eval_loss": 3.1796875, "eval_runtime": 2.6016, "eval_samples_per_second": 38.438, "eval_steps_per_second": 0.769, "step": 50 }, { "epoch": 3.19, "learning_rate": 5e-05, "loss": 1.0385, "step": 51 }, { "epoch": 3.19, "eval_accuracy": 0.037113821138211385, "eval_loss": 3.296875, "eval_runtime": 2.6072, "eval_samples_per_second": 38.355, "eval_steps_per_second": 0.767, "step": 51 }, { "epoch": 3.25, "learning_rate": 5e-05, "loss": 1.0254, "step": 52 }, { "epoch": 3.25, "eval_accuracy": 0.03670731707317073, "eval_loss": 3.361328125, "eval_runtime": 2.2651, "eval_samples_per_second": 44.148, "eval_steps_per_second": 0.883, "step": 52 }, { "epoch": 3.31, "learning_rate": 5e-05, "loss": 0.9656, "step": 53 }, { "epoch": 3.31, "eval_accuracy": 0.036829268292682925, "eval_loss": 3.36328125, "eval_runtime": 2.6237, "eval_samples_per_second": 38.115, "eval_steps_per_second": 0.762, "step": 53 }, { "epoch": 3.38, "learning_rate": 5e-05, "loss": 1.036, "step": 54 }, { "epoch": 3.38, "eval_accuracy": 0.03664634146341463, "eval_loss": 3.3359375, "eval_runtime": 2.5895, "eval_samples_per_second": 38.618, "eval_steps_per_second": 0.772, "step": 54 }, { "epoch": 3.44, "learning_rate": 5e-05, "loss": 0.9366, "step": 55 }, { "epoch": 3.44, "eval_accuracy": 0.036565040650406506, "eval_loss": 3.294921875, "eval_runtime": 2.2574, "eval_samples_per_second": 44.298, "eval_steps_per_second": 0.886, "step": 55 }, { "epoch": 3.5, "learning_rate": 5e-05, "loss": 0.9712, "step": 56 }, { "epoch": 3.5, "eval_accuracy": 0.03666666666666667, "eval_loss": 3.26953125, "eval_runtime": 2.6146, "eval_samples_per_second": 38.247, "eval_steps_per_second": 0.765, "step": 56 }, { "epoch": 3.56, "learning_rate": 5e-05, "loss": 1.0066, "step": 57 }, { "epoch": 3.56, "eval_accuracy": 0.036565040650406506, "eval_loss": 3.267578125, "eval_runtime": 2.6062, "eval_samples_per_second": 38.37, "eval_steps_per_second": 0.767, "step": 57 }, { "epoch": 3.62, "learning_rate": 5e-05, "loss": 0.9952, "step": 58 }, { "epoch": 3.62, "eval_accuracy": 0.03676829268292683, "eval_loss": 3.27734375, "eval_runtime": 2.5965, "eval_samples_per_second": 38.513, "eval_steps_per_second": 0.77, "step": 58 }, { "epoch": 3.69, "learning_rate": 5e-05, "loss": 1.0352, "step": 59 }, { "epoch": 3.69, "eval_accuracy": 0.0366869918699187, "eval_loss": 3.2890625, "eval_runtime": 2.3047, "eval_samples_per_second": 43.39, "eval_steps_per_second": 0.868, "step": 59 }, { "epoch": 3.75, "learning_rate": 5e-05, "loss": 1.0212, "step": 60 }, { "epoch": 3.75, "eval_accuracy": 0.036178861788617886, "eval_loss": 3.31640625, "eval_runtime": 2.6015, "eval_samples_per_second": 38.439, "eval_steps_per_second": 0.769, "step": 60 }, { "epoch": 3.81, "learning_rate": 5e-05, "loss": 0.9468, "step": 61 }, { "epoch": 3.81, "eval_accuracy": 0.036036585365853656, "eval_loss": 3.3203125, "eval_runtime": 2.2607, "eval_samples_per_second": 44.233, "eval_steps_per_second": 0.885, "step": 61 }, { "epoch": 3.88, "learning_rate": 5e-05, "loss": 0.9155, "step": 62 }, { "epoch": 3.88, "eval_accuracy": 0.03664634146341463, "eval_loss": 3.322265625, "eval_runtime": 2.6074, "eval_samples_per_second": 38.352, "eval_steps_per_second": 0.767, "step": 62 }, { "epoch": 3.94, "learning_rate": 5e-05, "loss": 0.8552, "step": 63 }, { "epoch": 3.94, "eval_accuracy": 0.03701219512195122, "eval_loss": 3.326171875, "eval_runtime": 2.2425, "eval_samples_per_second": 44.594, "eval_steps_per_second": 0.892, "step": 63 }, { "epoch": 4.0, "learning_rate": 5e-05, "loss": 0.9575, "step": 64 }, { "epoch": 4.0, "eval_accuracy": 0.03699186991869919, "eval_loss": 3.333984375, "eval_runtime": 2.0082, "eval_samples_per_second": 49.796, "eval_steps_per_second": 0.996, "step": 64 }, { "epoch": 4.06, "learning_rate": 5e-05, "loss": 0.6384, "step": 65 }, { "epoch": 4.06, "eval_accuracy": 0.036971544715447155, "eval_loss": 3.375, "eval_runtime": 2.5098, "eval_samples_per_second": 39.844, "eval_steps_per_second": 0.797, "step": 65 }, { "epoch": 4.12, "learning_rate": 5e-05, "loss": 0.6436, "step": 66 }, { "epoch": 4.12, "eval_accuracy": 0.03636178861788618, "eval_loss": 3.4453125, "eval_runtime": 2.6062, "eval_samples_per_second": 38.37, "eval_steps_per_second": 0.767, "step": 66 }, { "epoch": 4.19, "learning_rate": 5e-05, "loss": 0.5752, "step": 67 }, { "epoch": 4.19, "eval_accuracy": 0.035792682926829265, "eval_loss": 3.5390625, "eval_runtime": 2.6001, "eval_samples_per_second": 38.46, "eval_steps_per_second": 0.769, "step": 67 }, { "epoch": 4.25, "learning_rate": 5e-05, "loss": 0.6542, "step": 68 }, { "epoch": 4.25, "eval_accuracy": 0.03540650406504065, "eval_loss": 3.6015625, "eval_runtime": 2.4568, "eval_samples_per_second": 40.704, "eval_steps_per_second": 0.814, "step": 68 }, { "epoch": 4.31, "learning_rate": 5e-05, "loss": 0.6724, "step": 69 }, { "epoch": 4.31, "eval_accuracy": 0.03540650406504065, "eval_loss": 3.6015625, "eval_runtime": 2.0755, "eval_samples_per_second": 48.182, "eval_steps_per_second": 0.964, "step": 69 }, { "epoch": 4.38, "learning_rate": 5e-05, "loss": 0.591, "step": 70 }, { "epoch": 4.38, "eval_accuracy": 0.03591463414634146, "eval_loss": 3.59375, "eval_runtime": 2.3021, "eval_samples_per_second": 43.439, "eval_steps_per_second": 0.869, "step": 70 }, { "epoch": 4.44, "learning_rate": 5e-05, "loss": 0.5346, "step": 71 }, { "epoch": 4.44, "eval_accuracy": 0.03613821138211382, "eval_loss": 3.580078125, "eval_runtime": 2.0073, "eval_samples_per_second": 49.819, "eval_steps_per_second": 0.996, "step": 71 }, { "epoch": 4.5, "learning_rate": 5e-05, "loss": 0.5112, "step": 72 }, { "epoch": 4.5, "eval_accuracy": 0.036077235772357726, "eval_loss": 3.576171875, "eval_runtime": 2.6065, "eval_samples_per_second": 38.365, "eval_steps_per_second": 0.767, "step": 72 }, { "epoch": 4.56, "learning_rate": 5e-05, "loss": 0.5443, "step": 73 }, { "epoch": 4.56, "eval_accuracy": 0.03619918699186992, "eval_loss": 3.583984375, "eval_runtime": 2.6395, "eval_samples_per_second": 37.886, "eval_steps_per_second": 0.758, "step": 73 }, { "epoch": 4.62, "learning_rate": 5e-05, "loss": 0.5689, "step": 74 }, { "epoch": 4.62, "eval_accuracy": 0.035833333333333335, "eval_loss": 3.615234375, "eval_runtime": 2.4972, "eval_samples_per_second": 40.046, "eval_steps_per_second": 0.801, "step": 74 }, { "epoch": 4.69, "learning_rate": 5e-05, "loss": 0.5667, "step": 75 }, { "epoch": 4.69, "eval_accuracy": 0.0357520325203252, "eval_loss": 3.6328125, "eval_runtime": 2.4508, "eval_samples_per_second": 40.803, "eval_steps_per_second": 0.816, "step": 75 }, { "epoch": 4.75, "learning_rate": 5e-05, "loss": 0.554, "step": 76 }, { "epoch": 4.75, "eval_accuracy": 0.03573170731707317, "eval_loss": 3.634765625, "eval_runtime": 2.0574, "eval_samples_per_second": 48.604, "eval_steps_per_second": 0.972, "step": 76 }, { "epoch": 4.81, "learning_rate": 5e-05, "loss": 0.6087, "step": 77 }, { "epoch": 4.81, "eval_accuracy": 0.03552845528455285, "eval_loss": 3.625, "eval_runtime": 2.6082, "eval_samples_per_second": 38.34, "eval_steps_per_second": 0.767, "step": 77 }, { "epoch": 4.88, "learning_rate": 5e-05, "loss": 0.5236, "step": 78 }, { "epoch": 4.88, "eval_accuracy": 0.03552845528455285, "eval_loss": 3.615234375, "eval_runtime": 1.8631, "eval_samples_per_second": 53.674, "eval_steps_per_second": 1.073, "step": 78 }, { "epoch": 4.94, "learning_rate": 5e-05, "loss": 0.5458, "step": 79 }, { "epoch": 4.94, "eval_accuracy": 0.03550813008130081, "eval_loss": 3.578125, "eval_runtime": 2.6009, "eval_samples_per_second": 38.449, "eval_steps_per_second": 0.769, "step": 79 }, { "epoch": 5.0, "learning_rate": 5e-05, "loss": 0.5702, "step": 80 }, { "epoch": 5.0, "eval_accuracy": 0.03550813008130081, "eval_loss": 3.548828125, "eval_runtime": 2.5127, "eval_samples_per_second": 39.798, "eval_steps_per_second": 0.796, "step": 80 }, { "epoch": 5.0, "step": 80, "total_flos": 9903514583040.0, "train_loss": 1.6890087127685547, "train_runtime": 406.6939, "train_samples_per_second": 12.294, "train_steps_per_second": 0.197 } ], "max_steps": 80, "num_train_epochs": 5, "total_flos": 9903514583040.0, "trial_name": null, "trial_params": null }