{ "best_metric": null, "best_model_checkpoint": null, "epoch": 60.0, "global_step": 35400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.85, "learning_rate": 0.005915423728813559, "loss": 0.9831, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.6217125382262997, "eval_loss": 0.8378309011459351, "eval_runtime": 43.2844, "eval_samples_per_second": 75.547, "eval_steps_per_second": 9.449, "step": 590 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 1.0, "step": 590 }, { "epoch": 1.69, "learning_rate": 0.005830847457627119, "loss": 0.965, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.3782874617737003, "eval_loss": 1.149783968925476, "eval_runtime": 43.2275, "eval_samples_per_second": 75.646, "eval_steps_per_second": 9.462, "step": 1180 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 2.0, "step": 1180 }, { "epoch": 2.54, "learning_rate": 0.005746101694915255, "loss": 0.8937, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.6217125382262997, "eval_loss": 1.2835785150527954, "eval_runtime": 43.2406, "eval_samples_per_second": 75.623, "eval_steps_per_second": 9.459, "step": 1770 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 3.0, "step": 1770 }, { "epoch": 3.39, "learning_rate": 0.00566135593220339, "loss": 0.9435, "step": 2000 }, { "epoch": 4.0, "eval_accuracy": 0.6217125382262997, "eval_loss": 0.8481320142745972, "eval_runtime": 43.2628, "eval_samples_per_second": 75.585, "eval_steps_per_second": 9.454, "step": 2360 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 4.0, "step": 2360 }, { "epoch": 4.24, "learning_rate": 0.005576779661016949, "loss": 0.8566, "step": 2500 }, { "epoch": 5.0, "eval_accuracy": 0.3834862385321101, "eval_loss": 0.9289329648017883, "eval_runtime": 43.2414, "eval_samples_per_second": 75.622, "eval_steps_per_second": 9.459, "step": 2950 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 5.0, "step": 2950 }, { "epoch": 5.08, "learning_rate": 0.005492033898305085, "loss": 0.8917, "step": 3000 }, { "epoch": 5.93, "learning_rate": 0.00540728813559322, "loss": 0.8868, "step": 3500 }, { "epoch": 6.0, "eval_accuracy": 0.591743119266055, "eval_loss": 0.651913583278656, "eval_runtime": 43.1168, "eval_samples_per_second": 75.84, "eval_steps_per_second": 9.486, "step": 3540 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 6.0, "step": 3540 }, { "epoch": 6.78, "learning_rate": 0.0053225423728813556, "loss": 0.8905, "step": 4000 }, { "epoch": 7.0, "eval_accuracy": 0.3785932721712538, "eval_loss": 1.771241307258606, "eval_runtime": 43.2692, "eval_samples_per_second": 75.573, "eval_steps_per_second": 9.452, "step": 4130 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 7.0, "step": 4130 }, { "epoch": 7.63, "learning_rate": 0.005237796610169492, "loss": 0.84, "step": 4500 }, { "epoch": 8.0, "eval_accuracy": 0.6217125382262997, "eval_loss": 0.9782317876815796, "eval_runtime": 43.2151, "eval_samples_per_second": 75.668, "eval_steps_per_second": 9.464, "step": 4720 }, { "best_epoch": 0, "best_eval_accuracy": 0.6217125382262997, "epoch": 8.0, "step": 4720 }, { "epoch": 8.47, "learning_rate": 0.005153220338983051, "loss": 0.7962, "step": 5000 }, { "epoch": 9.0, "eval_accuracy": 0.6568807339449542, "eval_loss": 0.6086090803146362, "eval_runtime": 43.139, "eval_samples_per_second": 75.801, "eval_steps_per_second": 9.481, "step": 5310 }, { "best_epoch": 8, "best_eval_accuracy": 0.6568807339449542, "epoch": 9.0, "step": 5310 }, { "epoch": 9.32, "learning_rate": 0.005068474576271187, "loss": 0.8003, "step": 5500 }, { "epoch": 10.0, "eval_accuracy": 0.6220183486238532, "eval_loss": 0.8011331558227539, "eval_runtime": 43.025, "eval_samples_per_second": 76.002, "eval_steps_per_second": 9.506, "step": 5900 }, { "best_epoch": 8, "best_eval_accuracy": 0.6568807339449542, "epoch": 10.0, "step": 5900 }, { "epoch": 10.17, "learning_rate": 0.004983728813559322, "loss": 0.793, "step": 6000 }, { "epoch": 11.0, "eval_accuracy": 0.6, "eval_loss": 0.6698857545852661, "eval_runtime": 43.1878, "eval_samples_per_second": 75.716, "eval_steps_per_second": 9.47, "step": 6490 }, { "best_epoch": 8, "best_eval_accuracy": 0.6568807339449542, "epoch": 11.0, "step": 6490 }, { "epoch": 11.02, "learning_rate": 0.004899152542372881, "loss": 0.7706, "step": 6500 }, { "epoch": 11.86, "learning_rate": 0.004814406779661017, "loss": 0.7558, "step": 7000 }, { "epoch": 12.0, "eval_accuracy": 0.6244648318042814, "eval_loss": 0.670046329498291, "eval_runtime": 43.1604, "eval_samples_per_second": 75.764, "eval_steps_per_second": 9.476, "step": 7080 }, { "best_epoch": 8, "best_eval_accuracy": 0.6568807339449542, "epoch": 12.0, "step": 7080 }, { "epoch": 12.71, "learning_rate": 0.004729661016949153, "loss": 0.7947, "step": 7500 }, { "epoch": 13.0, "eval_accuracy": 0.43149847094801225, "eval_loss": 1.0037223100662231, "eval_runtime": 43.2404, "eval_samples_per_second": 75.624, "eval_steps_per_second": 9.459, "step": 7670 }, { "best_epoch": 8, "best_eval_accuracy": 0.6568807339449542, "epoch": 13.0, "step": 7670 }, { "epoch": 13.56, "learning_rate": 0.004644915254237288, "loss": 0.7465, "step": 8000 }, { "epoch": 14.0, "eval_accuracy": 0.6902140672782875, "eval_loss": 0.6232324242591858, "eval_runtime": 43.2231, "eval_samples_per_second": 75.654, "eval_steps_per_second": 9.463, "step": 8260 }, { "best_epoch": 13, "best_eval_accuracy": 0.6902140672782875, "epoch": 14.0, "step": 8260 }, { "epoch": 14.41, "learning_rate": 0.004560169491525424, "loss": 0.6835, "step": 8500 }, { "epoch": 15.0, "eval_accuracy": 0.6889908256880733, "eval_loss": 0.6589847207069397, "eval_runtime": 43.0731, "eval_samples_per_second": 75.917, "eval_steps_per_second": 9.495, "step": 8850 }, { "best_epoch": 13, "best_eval_accuracy": 0.6902140672782875, "epoch": 15.0, "step": 8850 }, { "epoch": 15.25, "learning_rate": 0.004475593220338983, "loss": 0.7494, "step": 9000 }, { "epoch": 16.0, "eval_accuracy": 0.6862385321100918, "eval_loss": 0.7069215178489685, "eval_runtime": 43.2435, "eval_samples_per_second": 75.618, "eval_steps_per_second": 9.458, "step": 9440 }, { "best_epoch": 13, "best_eval_accuracy": 0.6902140672782875, "epoch": 16.0, "step": 9440 }, { "epoch": 16.1, "learning_rate": 0.004390847457627119, "loss": 0.7499, "step": 9500 }, { "epoch": 16.95, "learning_rate": 0.004306101694915254, "loss": 0.6775, "step": 10000 }, { "epoch": 17.0, "eval_accuracy": 0.4856269113149847, "eval_loss": 0.9627411365509033, "eval_runtime": 43.2672, "eval_samples_per_second": 75.577, "eval_steps_per_second": 9.453, "step": 10030 }, { "best_epoch": 13, "best_eval_accuracy": 0.6902140672782875, "epoch": 17.0, "step": 10030 }, { "epoch": 17.8, "learning_rate": 0.0042213559322033896, "loss": 0.6928, "step": 10500 }, { "epoch": 18.0, "eval_accuracy": 0.5663608562691131, "eval_loss": 1.088120460510254, "eval_runtime": 43.2117, "eval_samples_per_second": 75.674, "eval_steps_per_second": 9.465, "step": 10620 }, { "best_epoch": 13, "best_eval_accuracy": 0.6902140672782875, "epoch": 18.0, "step": 10620 }, { "epoch": 18.64, "learning_rate": 0.004136610169491526, "loss": 0.6991, "step": 11000 }, { "epoch": 19.0, "eval_accuracy": 0.7024464831804281, "eval_loss": 0.5778092741966248, "eval_runtime": 43.3502, "eval_samples_per_second": 75.432, "eval_steps_per_second": 9.435, "step": 11210 }, { "best_epoch": 18, "best_eval_accuracy": 0.7024464831804281, "epoch": 19.0, "step": 11210 }, { "epoch": 19.49, "learning_rate": 0.004051864406779661, "loss": 0.6594, "step": 11500 }, { "epoch": 20.0, "eval_accuracy": 0.6051987767584098, "eval_loss": 0.7909632325172424, "eval_runtime": 43.1541, "eval_samples_per_second": 75.775, "eval_steps_per_second": 9.478, "step": 11800 }, { "best_epoch": 18, "best_eval_accuracy": 0.7024464831804281, "epoch": 20.0, "step": 11800 }, { "epoch": 20.34, "learning_rate": 0.003967118644067796, "loss": 0.6327, "step": 12000 }, { "epoch": 21.0, "eval_accuracy": 0.6966360856269113, "eval_loss": 0.6203939914703369, "eval_runtime": 43.0351, "eval_samples_per_second": 75.984, "eval_steps_per_second": 9.504, "step": 12390 }, { "best_epoch": 18, "best_eval_accuracy": 0.7024464831804281, "epoch": 21.0, "step": 12390 }, { "epoch": 21.19, "learning_rate": 0.0038823728813559325, "loss": 0.6201, "step": 12500 }, { "epoch": 22.0, "eval_accuracy": 0.6792048929663609, "eval_loss": 0.5992993712425232, "eval_runtime": 43.2326, "eval_samples_per_second": 75.637, "eval_steps_per_second": 9.46, "step": 12980 }, { "best_epoch": 18, "best_eval_accuracy": 0.7024464831804281, "epoch": 22.0, "step": 12980 }, { "epoch": 22.03, "learning_rate": 0.0037977966101694917, "loss": 0.629, "step": 13000 }, { "epoch": 22.88, "learning_rate": 0.0037130508474576274, "loss": 0.6026, "step": 13500 }, { "epoch": 23.0, "eval_accuracy": 0.6633027522935779, "eval_loss": 0.6735050082206726, "eval_runtime": 43.216, "eval_samples_per_second": 75.666, "eval_steps_per_second": 9.464, "step": 13570 }, { "best_epoch": 18, "best_eval_accuracy": 0.7024464831804281, "epoch": 23.0, "step": 13570 }, { "epoch": 23.73, "learning_rate": 0.0036283050847457626, "loss": 0.5826, "step": 14000 }, { "epoch": 24.0, "eval_accuracy": 0.6605504587155964, "eval_loss": 0.6619319319725037, "eval_runtime": 43.1433, "eval_samples_per_second": 75.794, "eval_steps_per_second": 9.48, "step": 14160 }, { "best_epoch": 18, "best_eval_accuracy": 0.7024464831804281, "epoch": 24.0, "step": 14160 }, { "epoch": 24.58, "learning_rate": 0.0035435593220338986, "loss": 0.5831, "step": 14500 }, { "epoch": 25.0, "eval_accuracy": 0.7073394495412844, "eval_loss": 0.7766701579093933, "eval_runtime": 43.2655, "eval_samples_per_second": 75.58, "eval_steps_per_second": 9.453, "step": 14750 }, { "best_epoch": 24, "best_eval_accuracy": 0.7073394495412844, "epoch": 25.0, "step": 14750 }, { "epoch": 25.42, "learning_rate": 0.003458813559322034, "loss": 0.5809, "step": 15000 }, { "epoch": 26.0, "eval_accuracy": 0.5425076452599389, "eval_loss": 1.2840725183486938, "eval_runtime": 43.2156, "eval_samples_per_second": 75.667, "eval_steps_per_second": 9.464, "step": 15340 }, { "best_epoch": 24, "best_eval_accuracy": 0.7073394495412844, "epoch": 26.0, "step": 15340 }, { "epoch": 26.27, "learning_rate": 0.0033740677966101694, "loss": 0.6095, "step": 15500 }, { "epoch": 27.0, "eval_accuracy": 0.6400611620795107, "eval_loss": 0.8816479444503784, "eval_runtime": 43.1544, "eval_samples_per_second": 75.774, "eval_steps_per_second": 9.478, "step": 15930 }, { "best_epoch": 24, "best_eval_accuracy": 0.7073394495412844, "epoch": 27.0, "step": 15930 }, { "epoch": 27.12, "learning_rate": 0.0032893220338983055, "loss": 0.5729, "step": 16000 }, { "epoch": 27.97, "learning_rate": 0.0032045762711864407, "loss": 0.5478, "step": 16500 }, { "epoch": 28.0, "eval_accuracy": 0.7189602446483181, "eval_loss": 0.6825653910636902, "eval_runtime": 43.238, "eval_samples_per_second": 75.628, "eval_steps_per_second": 9.459, "step": 16520 }, { "best_epoch": 27, "best_eval_accuracy": 0.7189602446483181, "epoch": 28.0, "step": 16520 }, { "epoch": 28.81, "learning_rate": 0.0031198305084745763, "loss": 0.5516, "step": 17000 }, { "epoch": 29.0, "eval_accuracy": 0.7168195718654434, "eval_loss": 0.6076229214668274, "eval_runtime": 43.2007, "eval_samples_per_second": 75.693, "eval_steps_per_second": 9.467, "step": 17110 }, { "best_epoch": 27, "best_eval_accuracy": 0.7189602446483181, "epoch": 29.0, "step": 17110 }, { "epoch": 29.66, "learning_rate": 0.0030352542372881356, "loss": 0.5538, "step": 17500 }, { "epoch": 30.0, "eval_accuracy": 0.6859327217125383, "eval_loss": 0.9477331042289734, "eval_runtime": 43.2891, "eval_samples_per_second": 75.539, "eval_steps_per_second": 9.448, "step": 17700 }, { "best_epoch": 27, "best_eval_accuracy": 0.7189602446483181, "epoch": 30.0, "step": 17700 }, { "epoch": 30.51, "learning_rate": 0.002950508474576271, "loss": 0.5516, "step": 18000 }, { "epoch": 31.0, "eval_accuracy": 0.7137614678899082, "eval_loss": 0.6786766052246094, "eval_runtime": 43.2599, "eval_samples_per_second": 75.59, "eval_steps_per_second": 9.454, "step": 18290 }, { "best_epoch": 27, "best_eval_accuracy": 0.7189602446483181, "epoch": 31.0, "step": 18290 }, { "epoch": 31.36, "learning_rate": 0.002865762711864407, "loss": 0.5296, "step": 18500 }, { "epoch": 32.0, "eval_accuracy": 0.7006116207951071, "eval_loss": 0.8120760917663574, "eval_runtime": 43.2461, "eval_samples_per_second": 75.614, "eval_steps_per_second": 9.458, "step": 18880 }, { "best_epoch": 27, "best_eval_accuracy": 0.7189602446483181, "epoch": 32.0, "step": 18880 }, { "epoch": 32.2, "learning_rate": 0.002781186440677966, "loss": 0.5209, "step": 19000 }, { "epoch": 33.0, "eval_accuracy": 0.7018348623853211, "eval_loss": 0.8754389882087708, "eval_runtime": 43.2296, "eval_samples_per_second": 75.643, "eval_steps_per_second": 9.461, "step": 19470 }, { "best_epoch": 27, "best_eval_accuracy": 0.7189602446483181, "epoch": 33.0, "step": 19470 }, { "epoch": 33.05, "learning_rate": 0.0026964406779661017, "loss": 0.5172, "step": 19500 }, { "epoch": 33.9, "learning_rate": 0.0026116949152542373, "loss": 0.4932, "step": 20000 }, { "epoch": 34.0, "eval_accuracy": 0.7097859327217125, "eval_loss": 0.6252529621124268, "eval_runtime": 43.3313, "eval_samples_per_second": 75.465, "eval_steps_per_second": 9.439, "step": 20060 }, { "best_epoch": 27, "best_eval_accuracy": 0.7189602446483181, "epoch": 34.0, "step": 20060 }, { "epoch": 34.75, "learning_rate": 0.002527118644067797, "loss": 0.4914, "step": 20500 }, { "epoch": 35.0, "eval_accuracy": 0.7039755351681957, "eval_loss": 0.6481243968009949, "eval_runtime": 43.2111, "eval_samples_per_second": 75.675, "eval_steps_per_second": 9.465, "step": 20650 }, { "best_epoch": 27, "best_eval_accuracy": 0.7189602446483181, "epoch": 35.0, "step": 20650 }, { "epoch": 35.59, "learning_rate": 0.002442372881355932, "loss": 0.4845, "step": 21000 }, { "epoch": 36.0, "eval_accuracy": 0.7207951070336391, "eval_loss": 0.6696820855140686, "eval_runtime": 43.2359, "eval_samples_per_second": 75.632, "eval_steps_per_second": 9.46, "step": 21240 }, { "best_epoch": 35, "best_eval_accuracy": 0.7207951070336391, "epoch": 36.0, "step": 21240 }, { "epoch": 36.44, "learning_rate": 0.002357627118644068, "loss": 0.4836, "step": 21500 }, { "epoch": 37.0, "eval_accuracy": 0.7363914373088685, "eval_loss": 0.6275990605354309, "eval_runtime": 43.254, "eval_samples_per_second": 75.6, "eval_steps_per_second": 9.456, "step": 21830 }, { "best_epoch": 36, "best_eval_accuracy": 0.7363914373088685, "epoch": 37.0, "step": 21830 }, { "epoch": 37.29, "learning_rate": 0.0022728813559322034, "loss": 0.4592, "step": 22000 }, { "epoch": 38.0, "eval_accuracy": 0.7342507645259939, "eval_loss": 0.5963826775550842, "eval_runtime": 43.2046, "eval_samples_per_second": 75.686, "eval_steps_per_second": 9.467, "step": 22420 }, { "best_epoch": 36, "best_eval_accuracy": 0.7363914373088685, "epoch": 38.0, "step": 22420 }, { "epoch": 38.14, "learning_rate": 0.002188135593220339, "loss": 0.4692, "step": 22500 }, { "epoch": 38.98, "learning_rate": 0.0021033898305084747, "loss": 0.4642, "step": 23000 }, { "epoch": 39.0, "eval_accuracy": 0.7363914373088685, "eval_loss": 0.5508460402488708, "eval_runtime": 43.299, "eval_samples_per_second": 75.521, "eval_steps_per_second": 9.446, "step": 23010 }, { "best_epoch": 36, "best_eval_accuracy": 0.7363914373088685, "epoch": 39.0, "step": 23010 }, { "epoch": 39.83, "learning_rate": 0.002018813559322034, "loss": 0.4704, "step": 23500 }, { "epoch": 40.0, "eval_accuracy": 0.708256880733945, "eval_loss": 0.8355740308761597, "eval_runtime": 43.2934, "eval_samples_per_second": 75.531, "eval_steps_per_second": 9.447, "step": 23600 }, { "best_epoch": 36, "best_eval_accuracy": 0.7363914373088685, "epoch": 40.0, "step": 23600 }, { "epoch": 40.68, "learning_rate": 0.0019340677966101694, "loss": 0.4556, "step": 24000 }, { "epoch": 41.0, "eval_accuracy": 0.7339449541284404, "eval_loss": 0.6307940483093262, "eval_runtime": 43.1769, "eval_samples_per_second": 75.735, "eval_steps_per_second": 9.473, "step": 24190 }, { "best_epoch": 36, "best_eval_accuracy": 0.7363914373088685, "epoch": 41.0, "step": 24190 }, { "epoch": 41.53, "learning_rate": 0.0018494915254237288, "loss": 0.4583, "step": 24500 }, { "epoch": 42.0, "eval_accuracy": 0.7373088685015291, "eval_loss": 0.5991156697273254, "eval_runtime": 43.2189, "eval_samples_per_second": 75.661, "eval_steps_per_second": 9.463, "step": 24780 }, { "best_epoch": 41, "best_eval_accuracy": 0.7373088685015291, "epoch": 42.0, "step": 24780 }, { "epoch": 42.37, "learning_rate": 0.0017647457627118644, "loss": 0.4445, "step": 25000 }, { "epoch": 43.0, "eval_accuracy": 0.7247706422018348, "eval_loss": 0.6277905106544495, "eval_runtime": 43.2757, "eval_samples_per_second": 75.562, "eval_steps_per_second": 9.451, "step": 25370 }, { "best_epoch": 41, "best_eval_accuracy": 0.7373088685015291, "epoch": 43.0, "step": 25370 }, { "epoch": 43.22, "learning_rate": 0.0016800000000000003, "loss": 0.4298, "step": 25500 }, { "epoch": 44.0, "eval_accuracy": 0.6880733944954128, "eval_loss": 0.7619650363922119, "eval_runtime": 43.2274, "eval_samples_per_second": 75.646, "eval_steps_per_second": 9.462, "step": 25960 }, { "best_epoch": 41, "best_eval_accuracy": 0.7373088685015291, "epoch": 44.0, "step": 25960 }, { "epoch": 44.07, "learning_rate": 0.0015952542372881355, "loss": 0.4346, "step": 26000 }, { "epoch": 44.92, "learning_rate": 0.0015105084745762713, "loss": 0.4319, "step": 26500 }, { "epoch": 45.0, "eval_accuracy": 0.7311926605504587, "eval_loss": 0.6154680848121643, "eval_runtime": 43.3006, "eval_samples_per_second": 75.519, "eval_steps_per_second": 9.446, "step": 26550 }, { "best_epoch": 41, "best_eval_accuracy": 0.7373088685015291, "epoch": 45.0, "step": 26550 }, { "epoch": 45.76, "learning_rate": 0.0014257627118644067, "loss": 0.4178, "step": 27000 }, { "epoch": 46.0, "eval_accuracy": 0.736085626911315, "eval_loss": 0.6141914129257202, "eval_runtime": 43.3021, "eval_samples_per_second": 75.516, "eval_steps_per_second": 9.445, "step": 27140 }, { "best_epoch": 41, "best_eval_accuracy": 0.7373088685015291, "epoch": 46.0, "step": 27140 }, { "epoch": 46.61, "learning_rate": 0.0013410169491525424, "loss": 0.4204, "step": 27500 }, { "epoch": 47.0, "eval_accuracy": 0.7321100917431193, "eval_loss": 0.6599806547164917, "eval_runtime": 43.3421, "eval_samples_per_second": 75.446, "eval_steps_per_second": 9.437, "step": 27730 }, { "best_epoch": 41, "best_eval_accuracy": 0.7373088685015291, "epoch": 47.0, "step": 27730 }, { "epoch": 47.46, "learning_rate": 0.001256271186440678, "loss": 0.4204, "step": 28000 }, { "epoch": 48.0, "eval_accuracy": 0.7403669724770642, "eval_loss": 0.601150631904602, "eval_runtime": 43.197, "eval_samples_per_second": 75.7, "eval_steps_per_second": 9.468, "step": 28320 }, { "best_epoch": 47, "best_eval_accuracy": 0.7403669724770642, "epoch": 48.0, "step": 28320 }, { "epoch": 48.31, "learning_rate": 0.0011715254237288136, "loss": 0.4038, "step": 28500 }, { "epoch": 49.0, "eval_accuracy": 0.7391437308868501, "eval_loss": 0.6091906428337097, "eval_runtime": 43.2721, "eval_samples_per_second": 75.568, "eval_steps_per_second": 9.452, "step": 28910 }, { "best_epoch": 47, "best_eval_accuracy": 0.7403669724770642, "epoch": 49.0, "step": 28910 }, { "epoch": 49.15, "learning_rate": 0.001086779661016949, "loss": 0.4017, "step": 29000 }, { "epoch": 50.0, "learning_rate": 0.0010022033898305085, "loss": 0.4103, "step": 29500 }, { "epoch": 50.0, "eval_accuracy": 0.7333333333333333, "eval_loss": 0.6660399436950684, "eval_runtime": 43.2592, "eval_samples_per_second": 75.591, "eval_steps_per_second": 9.455, "step": 29500 }, { "best_epoch": 47, "best_eval_accuracy": 0.7403669724770642, "epoch": 50.0, "step": 29500 }, { "epoch": 50.85, "learning_rate": 0.000917627118644068, "loss": 0.3979, "step": 30000 }, { "epoch": 51.0, "eval_accuracy": 0.7363914373088685, "eval_loss": 0.6605736613273621, "eval_runtime": 43.1587, "eval_samples_per_second": 75.767, "eval_steps_per_second": 9.477, "step": 30090 }, { "best_epoch": 47, "best_eval_accuracy": 0.7403669724770642, "epoch": 51.0, "step": 30090 }, { "epoch": 51.69, "learning_rate": 0.0008328813559322035, "loss": 0.3946, "step": 30500 }, { "epoch": 52.0, "eval_accuracy": 0.7223241590214067, "eval_loss": 0.7039574384689331, "eval_runtime": 43.2757, "eval_samples_per_second": 75.562, "eval_steps_per_second": 9.451, "step": 30680 }, { "best_epoch": 47, "best_eval_accuracy": 0.7403669724770642, "epoch": 52.0, "step": 30680 }, { "epoch": 52.54, "learning_rate": 0.000748135593220339, "loss": 0.3857, "step": 31000 }, { "epoch": 53.0, "eval_accuracy": 0.7412844036697248, "eval_loss": 0.6302646398544312, "eval_runtime": 43.1428, "eval_samples_per_second": 75.795, "eval_steps_per_second": 9.48, "step": 31270 }, { "best_epoch": 52, "best_eval_accuracy": 0.7412844036697248, "epoch": 53.0, "step": 31270 }, { "epoch": 53.39, "learning_rate": 0.0006633898305084746, "loss": 0.3837, "step": 31500 }, { "epoch": 54.0, "eval_accuracy": 0.7269113149847095, "eval_loss": 0.6580860614776611, "eval_runtime": 43.2755, "eval_samples_per_second": 75.562, "eval_steps_per_second": 9.451, "step": 31860 }, { "best_epoch": 52, "best_eval_accuracy": 0.7412844036697248, "epoch": 54.0, "step": 31860 }, { "epoch": 54.24, "learning_rate": 0.0005786440677966102, "loss": 0.3803, "step": 32000 }, { "epoch": 55.0, "eval_accuracy": 0.7281345565749235, "eval_loss": 0.6364992260932922, "eval_runtime": 43.2674, "eval_samples_per_second": 75.577, "eval_steps_per_second": 9.453, "step": 32450 }, { "best_epoch": 52, "best_eval_accuracy": 0.7412844036697248, "epoch": 55.0, "step": 32450 }, { "epoch": 55.08, "learning_rate": 0.0004938983050847458, "loss": 0.3847, "step": 32500 }, { "epoch": 55.93, "learning_rate": 0.0004091525423728814, "loss": 0.3792, "step": 33000 }, { "epoch": 56.0, "eval_accuracy": 0.7302752293577982, "eval_loss": 0.6349842548370361, "eval_runtime": 43.2291, "eval_samples_per_second": 75.643, "eval_steps_per_second": 9.461, "step": 33040 }, { "best_epoch": 52, "best_eval_accuracy": 0.7412844036697248, "epoch": 56.0, "step": 33040 }, { "epoch": 56.78, "learning_rate": 0.00032440677966101696, "loss": 0.3826, "step": 33500 }, { "epoch": 57.0, "eval_accuracy": 0.7415902140672783, "eval_loss": 0.6233869791030884, "eval_runtime": 43.2253, "eval_samples_per_second": 75.65, "eval_steps_per_second": 9.462, "step": 33630 }, { "best_epoch": 56, "best_eval_accuracy": 0.7415902140672783, "epoch": 57.0, "step": 33630 }, { "epoch": 57.63, "learning_rate": 0.00023966101694915254, "loss": 0.3784, "step": 34000 }, { "epoch": 58.0, "eval_accuracy": 0.735474006116208, "eval_loss": 0.6312357187271118, "eval_runtime": 43.257, "eval_samples_per_second": 75.595, "eval_steps_per_second": 9.455, "step": 34220 }, { "best_epoch": 56, "best_eval_accuracy": 0.7415902140672783, "epoch": 58.0, "step": 34220 }, { "epoch": 58.47, "learning_rate": 0.00015491525423728814, "loss": 0.373, "step": 34500 }, { "epoch": 59.0, "eval_accuracy": 0.7403669724770642, "eval_loss": 0.6151607632637024, "eval_runtime": 43.2076, "eval_samples_per_second": 75.681, "eval_steps_per_second": 9.466, "step": 34810 }, { "best_epoch": 56, "best_eval_accuracy": 0.7415902140672783, "epoch": 59.0, "step": 34810 }, { "epoch": 59.32, "learning_rate": 7.016949152542373e-05, "loss": 0.3713, "step": 35000 }, { "epoch": 60.0, "eval_accuracy": 0.735474006116208, "eval_loss": 0.6204875707626343, "eval_runtime": 20.923, "eval_samples_per_second": 156.287, "eval_steps_per_second": 19.548, "step": 35400 }, { "best_epoch": 56, "best_eval_accuracy": 0.7415902140672783, "epoch": 60.0, "step": 35400 }, { "epoch": 60.0, "step": 35400, "total_flos": 2.6355950886279168e+17, "train_loss": 0.581377860031559, "train_runtime": 12400.4956, "train_samples_per_second": 45.613, "train_steps_per_second": 2.855 } ], "max_steps": 35400, "num_train_epochs": 60, "total_flos": 2.6355950886279168e+17, "trial_name": null, "trial_params": null }