{ "best_metric": 0.7648497198166072, "best_model_checkpoint": "/home/user/emrecan/models/bert-base-multilingual-cased_allnli_tr/checkpoint-80000", "epoch": 3.0, "global_step": 88320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.9773550724637682e-05, "loss": 0.8623, "step": 1000 }, { "epoch": 0.03, "eval_accuracy": 0.5917473255221599, "eval_loss": 0.9075943827629089, "eval_runtime": 49.3629, "eval_samples_per_second": 198.833, "eval_steps_per_second": 6.219, "step": 1000 }, { "epoch": 0.07, "learning_rate": 1.9547101449275363e-05, "loss": 0.7528, "step": 2000 }, { "epoch": 0.07, "eval_accuracy": 0.6119205298013245, "eval_loss": 0.8586783409118652, "eval_runtime": 49.2395, "eval_samples_per_second": 199.332, "eval_steps_per_second": 6.235, "step": 2000 }, { "epoch": 0.1, "learning_rate": 1.9320652173913047e-05, "loss": 0.7074, "step": 3000 }, { "epoch": 0.1, "eval_accuracy": 0.664696892511462, "eval_loss": 0.7866753935813904, "eval_runtime": 49.3359, "eval_samples_per_second": 198.942, "eval_steps_per_second": 6.223, "step": 3000 }, { "epoch": 0.14, "learning_rate": 1.9094202898550727e-05, "loss": 0.6949, "step": 4000 }, { "epoch": 0.14, "eval_accuracy": 0.6772287315333673, "eval_loss": 0.7473878264427185, "eval_runtime": 49.2961, "eval_samples_per_second": 199.103, "eval_steps_per_second": 6.228, "step": 4000 }, { "epoch": 0.17, "learning_rate": 1.8867753623188408e-05, "loss": 0.6681, "step": 5000 }, { "epoch": 0.17, "eval_accuracy": 0.6814060112073357, "eval_loss": 0.7661426067352295, "eval_runtime": 49.2761, "eval_samples_per_second": 199.184, "eval_steps_per_second": 6.23, "step": 5000 }, { "epoch": 0.2, "learning_rate": 1.864130434782609e-05, "loss": 0.6597, "step": 6000 }, { "epoch": 0.2, "eval_accuracy": 0.6943453897096281, "eval_loss": 0.7264170050621033, "eval_runtime": 49.2718, "eval_samples_per_second": 199.201, "eval_steps_per_second": 6.231, "step": 6000 }, { "epoch": 0.24, "learning_rate": 1.841485507246377e-05, "loss": 0.6495, "step": 7000 }, { "epoch": 0.24, "eval_accuracy": 0.6781456953642384, "eval_loss": 0.7841250896453857, "eval_runtime": 49.2905, "eval_samples_per_second": 199.126, "eval_steps_per_second": 6.228, "step": 7000 }, { "epoch": 0.27, "learning_rate": 1.818840579710145e-05, "loss": 0.6323, "step": 8000 }, { "epoch": 0.27, "eval_accuracy": 0.6951604686704025, "eval_loss": 0.7256377339363098, "eval_runtime": 49.2057, "eval_samples_per_second": 199.469, "eval_steps_per_second": 6.239, "step": 8000 }, { "epoch": 0.31, "learning_rate": 1.7961956521739134e-05, "loss": 0.6308, "step": 9000 }, { "epoch": 0.31, "eval_accuracy": 0.6957717778909832, "eval_loss": 0.7319093346595764, "eval_runtime": 49.2073, "eval_samples_per_second": 199.462, "eval_steps_per_second": 6.239, "step": 9000 }, { "epoch": 0.34, "learning_rate": 1.7735507246376815e-05, "loss": 0.6254, "step": 10000 }, { "epoch": 0.34, "eval_accuracy": 0.7003565970453388, "eval_loss": 0.7053707838058472, "eval_runtime": 49.2797, "eval_samples_per_second": 199.169, "eval_steps_per_second": 6.23, "step": 10000 }, { "epoch": 0.37, "learning_rate": 1.7509057971014495e-05, "loss": 0.6233, "step": 11000 }, { "epoch": 0.37, "eval_accuracy": 0.708507386653082, "eval_loss": 0.7068630456924438, "eval_runtime": 55.8024, "eval_samples_per_second": 175.889, "eval_steps_per_second": 5.502, "step": 11000 }, { "epoch": 0.41, "learning_rate": 1.7282608695652176e-05, "loss": 0.6165, "step": 12000 }, { "epoch": 0.41, "eval_accuracy": 0.7180845644421804, "eval_loss": 0.687969982624054, "eval_runtime": 50.9115, "eval_samples_per_second": 192.785, "eval_steps_per_second": 6.03, "step": 12000 }, { "epoch": 0.44, "learning_rate": 1.7056159420289856e-05, "loss": 0.6033, "step": 13000 }, { "epoch": 0.44, "eval_accuracy": 0.719714722363729, "eval_loss": 0.6844114065170288, "eval_runtime": 49.3739, "eval_samples_per_second": 198.789, "eval_steps_per_second": 6.218, "step": 13000 }, { "epoch": 0.48, "learning_rate": 1.6829710144927537e-05, "loss": 0.6014, "step": 14000 }, { "epoch": 0.48, "eval_accuracy": 0.7128884360672441, "eval_loss": 0.6752753853797913, "eval_runtime": 49.3697, "eval_samples_per_second": 198.806, "eval_steps_per_second": 6.218, "step": 14000 }, { "epoch": 0.51, "learning_rate": 1.6603260869565218e-05, "loss": 0.5947, "step": 15000 }, { "epoch": 0.51, "eval_accuracy": 0.7039225674987264, "eval_loss": 0.7000291347503662, "eval_runtime": 49.3889, "eval_samples_per_second": 198.729, "eval_steps_per_second": 6.216, "step": 15000 }, { "epoch": 0.54, "learning_rate": 1.6376811594202898e-05, "loss": 0.5965, "step": 16000 }, { "epoch": 0.54, "eval_accuracy": 0.7263372389200203, "eval_loss": 0.670754075050354, "eval_runtime": 49.311, "eval_samples_per_second": 199.043, "eval_steps_per_second": 6.226, "step": 16000 }, { "epoch": 0.58, "learning_rate": 1.615036231884058e-05, "loss": 0.5979, "step": 17000 }, { "epoch": 0.58, "eval_accuracy": 0.7284768211920529, "eval_loss": 0.656209409236908, "eval_runtime": 49.4043, "eval_samples_per_second": 198.667, "eval_steps_per_second": 6.214, "step": 17000 }, { "epoch": 0.61, "learning_rate": 1.5923913043478263e-05, "loss": 0.5787, "step": 18000 }, { "epoch": 0.61, "eval_accuracy": 0.7296994396332145, "eval_loss": 0.6554355621337891, "eval_runtime": 50.2795, "eval_samples_per_second": 195.209, "eval_steps_per_second": 6.106, "step": 18000 }, { "epoch": 0.65, "learning_rate": 1.5697463768115943e-05, "loss": 0.58, "step": 19000 }, { "epoch": 0.65, "eval_accuracy": 0.7315333672949567, "eval_loss": 0.6544056534767151, "eval_runtime": 49.3813, "eval_samples_per_second": 198.76, "eval_steps_per_second": 6.217, "step": 19000 }, { "epoch": 0.68, "learning_rate": 1.5471014492753624e-05, "loss": 0.574, "step": 20000 }, { "epoch": 0.68, "eval_accuracy": 0.7338767193071829, "eval_loss": 0.6549123525619507, "eval_runtime": 49.3189, "eval_samples_per_second": 199.011, "eval_steps_per_second": 6.225, "step": 20000 }, { "epoch": 0.71, "learning_rate": 1.5244565217391305e-05, "loss": 0.5751, "step": 21000 }, { "epoch": 0.71, "eval_accuracy": 0.7288843606724401, "eval_loss": 0.6545295715332031, "eval_runtime": 52.3727, "eval_samples_per_second": 187.407, "eval_steps_per_second": 5.862, "step": 21000 }, { "epoch": 0.75, "learning_rate": 1.5018115942028985e-05, "loss": 0.5659, "step": 22000 }, { "epoch": 0.75, "eval_accuracy": 0.7371370351502802, "eval_loss": 0.6466848850250244, "eval_runtime": 51.7409, "eval_samples_per_second": 189.695, "eval_steps_per_second": 5.933, "step": 22000 }, { "epoch": 0.78, "learning_rate": 1.479166666666667e-05, "loss": 0.5732, "step": 23000 }, { "epoch": 0.78, "eval_accuracy": 0.736220071319409, "eval_loss": 0.6447662711143494, "eval_runtime": 51.1755, "eval_samples_per_second": 191.791, "eval_steps_per_second": 5.999, "step": 23000 }, { "epoch": 0.82, "learning_rate": 1.456521739130435e-05, "loss": 0.5637, "step": 24000 }, { "epoch": 0.82, "eval_accuracy": 0.7355068772287315, "eval_loss": 0.6520141959190369, "eval_runtime": 51.0797, "eval_samples_per_second": 192.151, "eval_steps_per_second": 6.01, "step": 24000 }, { "epoch": 0.85, "learning_rate": 1.433876811594203e-05, "loss": 0.5648, "step": 25000 }, { "epoch": 0.85, "eval_accuracy": 0.7344880285277636, "eval_loss": 0.6411919593811035, "eval_runtime": 51.1242, "eval_samples_per_second": 191.983, "eval_steps_per_second": 6.005, "step": 25000 }, { "epoch": 0.88, "learning_rate": 1.4112318840579711e-05, "loss": 0.5622, "step": 26000 }, { "epoch": 0.88, "eval_accuracy": 0.735812531839022, "eval_loss": 0.6350020170211792, "eval_runtime": 51.0256, "eval_samples_per_second": 192.355, "eval_steps_per_second": 6.017, "step": 26000 }, { "epoch": 0.92, "learning_rate": 1.3885869565217392e-05, "loss": 0.5579, "step": 27000 }, { "epoch": 0.92, "eval_accuracy": 0.7392766174223128, "eval_loss": 0.6346594095230103, "eval_runtime": 51.1443, "eval_samples_per_second": 191.908, "eval_steps_per_second": 6.003, "step": 27000 }, { "epoch": 0.95, "learning_rate": 1.3659420289855074e-05, "loss": 0.5518, "step": 28000 }, { "epoch": 0.95, "eval_accuracy": 0.739174732552216, "eval_loss": 0.6417071223258972, "eval_runtime": 52.294, "eval_samples_per_second": 187.689, "eval_steps_per_second": 5.871, "step": 28000 }, { "epoch": 0.99, "learning_rate": 1.3432971014492755e-05, "loss": 0.5547, "step": 29000 }, { "epoch": 0.99, "eval_accuracy": 0.7436576668364748, "eval_loss": 0.6321312785148621, "eval_runtime": 51.1442, "eval_samples_per_second": 191.908, "eval_steps_per_second": 6.003, "step": 29000 }, { "epoch": 1.02, "learning_rate": 1.3206521739130435e-05, "loss": 0.524, "step": 30000 }, { "epoch": 1.02, "eval_accuracy": 0.7412124299541518, "eval_loss": 0.6430493593215942, "eval_runtime": 51.2108, "eval_samples_per_second": 191.659, "eval_steps_per_second": 5.995, "step": 30000 }, { "epoch": 1.05, "learning_rate": 1.2980072463768116e-05, "loss": 0.4982, "step": 31000 }, { "epoch": 1.05, "eval_accuracy": 0.7457972491085074, "eval_loss": 0.6252649426460266, "eval_runtime": 57.7511, "eval_samples_per_second": 169.954, "eval_steps_per_second": 5.316, "step": 31000 }, { "epoch": 1.09, "learning_rate": 1.2753623188405797e-05, "loss": 0.5002, "step": 32000 }, { "epoch": 1.09, "eval_accuracy": 0.7418237391747325, "eval_loss": 0.6316350698471069, "eval_runtime": 52.7966, "eval_samples_per_second": 185.902, "eval_steps_per_second": 5.815, "step": 32000 }, { "epoch": 1.12, "learning_rate": 1.252717391304348e-05, "loss": 0.4993, "step": 33000 }, { "epoch": 1.12, "eval_accuracy": 0.7486500254712175, "eval_loss": 0.6196975111961365, "eval_runtime": 51.1876, "eval_samples_per_second": 191.746, "eval_steps_per_second": 5.998, "step": 33000 }, { "epoch": 1.15, "learning_rate": 1.2300724637681161e-05, "loss": 0.4963, "step": 34000 }, { "epoch": 1.15, "eval_accuracy": 0.7462047885888945, "eval_loss": 0.6307246685028076, "eval_runtime": 51.0522, "eval_samples_per_second": 192.254, "eval_steps_per_second": 6.013, "step": 34000 }, { "epoch": 1.19, "learning_rate": 1.2074275362318842e-05, "loss": 0.504, "step": 35000 }, { "epoch": 1.19, "eval_accuracy": 0.7480387162506368, "eval_loss": 0.627221405506134, "eval_runtime": 51.0856, "eval_samples_per_second": 192.129, "eval_steps_per_second": 6.01, "step": 35000 }, { "epoch": 1.22, "learning_rate": 1.1847826086956522e-05, "loss": 0.4922, "step": 36000 }, { "epoch": 1.22, "eval_accuracy": 0.7432501273560876, "eval_loss": 0.6410390138626099, "eval_runtime": 51.1349, "eval_samples_per_second": 191.943, "eval_steps_per_second": 6.004, "step": 36000 }, { "epoch": 1.26, "learning_rate": 1.1621376811594205e-05, "loss": 0.5016, "step": 37000 }, { "epoch": 1.26, "eval_accuracy": 0.7461029037187977, "eval_loss": 0.6295490860939026, "eval_runtime": 51.0732, "eval_samples_per_second": 192.175, "eval_steps_per_second": 6.011, "step": 37000 }, { "epoch": 1.29, "learning_rate": 1.1394927536231885e-05, "loss": 0.4957, "step": 38000 }, { "epoch": 1.29, "eval_accuracy": 0.7505858380030566, "eval_loss": 0.6182788014411926, "eval_runtime": 51.1969, "eval_samples_per_second": 191.711, "eval_steps_per_second": 5.996, "step": 38000 }, { "epoch": 1.32, "learning_rate": 1.1168478260869566e-05, "loss": 0.4883, "step": 39000 }, { "epoch": 1.32, "eval_accuracy": 0.7501782985226694, "eval_loss": 0.6260754466056824, "eval_runtime": 51.1903, "eval_samples_per_second": 191.735, "eval_steps_per_second": 5.997, "step": 39000 }, { "epoch": 1.36, "learning_rate": 1.0942028985507247e-05, "loss": 0.4985, "step": 40000 }, { "epoch": 1.36, "eval_accuracy": 0.7495669893020886, "eval_loss": 0.6315430402755737, "eval_runtime": 51.1506, "eval_samples_per_second": 191.884, "eval_steps_per_second": 6.002, "step": 40000 }, { "epoch": 1.39, "learning_rate": 1.0715579710144927e-05, "loss": 0.4885, "step": 41000 }, { "epoch": 1.39, "eval_accuracy": 0.7529291900152827, "eval_loss": 0.6188690066337585, "eval_runtime": 52.5846, "eval_samples_per_second": 186.651, "eval_steps_per_second": 5.838, "step": 41000 }, { "epoch": 1.43, "learning_rate": 1.0489130434782611e-05, "loss": 0.4909, "step": 42000 }, { "epoch": 1.43, "eval_accuracy": 0.7473255221599593, "eval_loss": 0.6188654899597168, "eval_runtime": 51.6594, "eval_samples_per_second": 189.994, "eval_steps_per_second": 5.943, "step": 42000 }, { "epoch": 1.46, "learning_rate": 1.0262681159420292e-05, "loss": 0.4894, "step": 43000 }, { "epoch": 1.46, "eval_accuracy": 0.7432501273560876, "eval_loss": 0.631429135799408, "eval_runtime": 51.1492, "eval_samples_per_second": 191.889, "eval_steps_per_second": 6.002, "step": 43000 }, { "epoch": 1.49, "learning_rate": 1.0036231884057972e-05, "loss": 0.4912, "step": 44000 }, { "epoch": 1.49, "eval_accuracy": 0.7445746306673459, "eval_loss": 0.6183902025222778, "eval_runtime": 51.0914, "eval_samples_per_second": 192.107, "eval_steps_per_second": 6.009, "step": 44000 }, { "epoch": 1.53, "learning_rate": 9.809782608695653e-06, "loss": 0.4851, "step": 45000 }, { "epoch": 1.53, "eval_accuracy": 0.7461029037187977, "eval_loss": 0.6257576942443848, "eval_runtime": 50.9837, "eval_samples_per_second": 192.513, "eval_steps_per_second": 6.022, "step": 45000 }, { "epoch": 1.56, "learning_rate": 9.583333333333335e-06, "loss": 0.4879, "step": 46000 }, { "epoch": 1.56, "eval_accuracy": 0.7480387162506368, "eval_loss": 0.6286013126373291, "eval_runtime": 51.1172, "eval_samples_per_second": 192.01, "eval_steps_per_second": 6.006, "step": 46000 }, { "epoch": 1.6, "learning_rate": 9.356884057971016e-06, "loss": 0.4907, "step": 47000 }, { "epoch": 1.6, "eval_accuracy": 0.7511971472236373, "eval_loss": 0.6196326613426208, "eval_runtime": 51.072, "eval_samples_per_second": 192.18, "eval_steps_per_second": 6.011, "step": 47000 }, { "epoch": 1.63, "learning_rate": 9.130434782608697e-06, "loss": 0.4884, "step": 48000 }, { "epoch": 1.63, "eval_accuracy": 0.7526235354049924, "eval_loss": 0.6156549453735352, "eval_runtime": 51.1757, "eval_samples_per_second": 191.79, "eval_steps_per_second": 5.999, "step": 48000 }, { "epoch": 1.66, "learning_rate": 8.903985507246377e-06, "loss": 0.4755, "step": 49000 }, { "epoch": 1.66, "eval_accuracy": 0.7591441670911869, "eval_loss": 0.6055976152420044, "eval_runtime": 50.9202, "eval_samples_per_second": 192.752, "eval_steps_per_second": 6.029, "step": 49000 }, { "epoch": 1.7, "learning_rate": 8.677536231884058e-06, "loss": 0.4811, "step": 50000 }, { "epoch": 1.7, "eval_accuracy": 0.7582272032603159, "eval_loss": 0.5976621508598328, "eval_runtime": 51.1974, "eval_samples_per_second": 191.709, "eval_steps_per_second": 5.996, "step": 50000 }, { "epoch": 1.73, "learning_rate": 8.45108695652174e-06, "loss": 0.4787, "step": 51000 }, { "epoch": 1.73, "eval_accuracy": 0.7620988283239939, "eval_loss": 0.5914710164070129, "eval_runtime": 52.4733, "eval_samples_per_second": 187.047, "eval_steps_per_second": 5.851, "step": 51000 }, { "epoch": 1.77, "learning_rate": 8.22463768115942e-06, "loss": 0.4779, "step": 52000 }, { "epoch": 1.77, "eval_accuracy": 0.7583290881304127, "eval_loss": 0.6014041304588318, "eval_runtime": 51.6886, "eval_samples_per_second": 189.887, "eval_steps_per_second": 5.939, "step": 52000 }, { "epoch": 1.8, "learning_rate": 7.998188405797103e-06, "loss": 0.4767, "step": 53000 }, { "epoch": 1.8, "eval_accuracy": 0.7623025980641874, "eval_loss": 0.6041266918182373, "eval_runtime": 51.0745, "eval_samples_per_second": 192.17, "eval_steps_per_second": 6.011, "step": 53000 }, { "epoch": 1.83, "learning_rate": 7.771739130434784e-06, "loss": 0.4737, "step": 54000 }, { "epoch": 1.83, "eval_accuracy": 0.7562913907284768, "eval_loss": 0.6093412637710571, "eval_runtime": 51.0594, "eval_samples_per_second": 192.227, "eval_steps_per_second": 6.013, "step": 54000 }, { "epoch": 1.87, "learning_rate": 7.545289855072464e-06, "loss": 0.4836, "step": 55000 }, { "epoch": 1.87, "eval_accuracy": 0.7568008150789608, "eval_loss": 0.6001136302947998, "eval_runtime": 51.1468, "eval_samples_per_second": 191.899, "eval_steps_per_second": 6.002, "step": 55000 }, { "epoch": 1.9, "learning_rate": 7.318840579710146e-06, "loss": 0.4765, "step": 56000 }, { "epoch": 1.9, "eval_accuracy": 0.7600611309220581, "eval_loss": 0.6109462380409241, "eval_runtime": 51.0882, "eval_samples_per_second": 192.119, "eval_steps_per_second": 6.009, "step": 56000 }, { "epoch": 1.94, "learning_rate": 7.092391304347826e-06, "loss": 0.4776, "step": 57000 }, { "epoch": 1.94, "eval_accuracy": 0.7598573611818645, "eval_loss": 0.6045997142791748, "eval_runtime": 51.0831, "eval_samples_per_second": 192.138, "eval_steps_per_second": 6.01, "step": 57000 }, { "epoch": 1.97, "learning_rate": 6.865942028985509e-06, "loss": 0.4769, "step": 58000 }, { "epoch": 1.97, "eval_accuracy": 0.7568008150789608, "eval_loss": 0.5969610214233398, "eval_runtime": 51.092, "eval_samples_per_second": 192.105, "eval_steps_per_second": 6.009, "step": 58000 }, { "epoch": 2.0, "learning_rate": 6.639492753623189e-06, "loss": 0.4654, "step": 59000 }, { "epoch": 2.0, "eval_accuracy": 0.7613856342333164, "eval_loss": 0.6146702170372009, "eval_runtime": 51.0794, "eval_samples_per_second": 192.152, "eval_steps_per_second": 6.01, "step": 59000 }, { "epoch": 2.04, "learning_rate": 6.41304347826087e-06, "loss": 0.4144, "step": 60000 }, { "epoch": 2.04, "eval_accuracy": 0.7565970453387671, "eval_loss": 0.6438983678817749, "eval_runtime": 51.1073, "eval_samples_per_second": 192.047, "eval_steps_per_second": 6.007, "step": 60000 }, { "epoch": 2.07, "learning_rate": 6.186594202898551e-06, "loss": 0.4101, "step": 61000 }, { "epoch": 2.07, "eval_accuracy": 0.7527254202750892, "eval_loss": 0.637277364730835, "eval_runtime": 52.7247, "eval_samples_per_second": 186.156, "eval_steps_per_second": 5.823, "step": 61000 }, { "epoch": 2.11, "learning_rate": 5.960144927536232e-06, "loss": 0.4192, "step": 62000 }, { "epoch": 2.11, "eval_accuracy": 0.7575140091696383, "eval_loss": 0.6135603189468384, "eval_runtime": 52.6526, "eval_samples_per_second": 186.411, "eval_steps_per_second": 5.831, "step": 62000 }, { "epoch": 2.14, "learning_rate": 5.733695652173914e-06, "loss": 0.4128, "step": 63000 }, { "epoch": 2.14, "eval_accuracy": 0.7559857361181864, "eval_loss": 0.6282700896263123, "eval_runtime": 51.1146, "eval_samples_per_second": 192.02, "eval_steps_per_second": 6.006, "step": 63000 }, { "epoch": 2.17, "learning_rate": 5.507246376811595e-06, "loss": 0.4204, "step": 64000 }, { "epoch": 2.17, "eval_accuracy": 0.7625063678043811, "eval_loss": 0.618690013885498, "eval_runtime": 51.1974, "eval_samples_per_second": 191.709, "eval_steps_per_second": 5.996, "step": 64000 }, { "epoch": 2.21, "learning_rate": 5.2807971014492755e-06, "loss": 0.4114, "step": 65000 }, { "epoch": 2.21, "eval_accuracy": 0.7620988283239939, "eval_loss": 0.6127009987831116, "eval_runtime": 51.1304, "eval_samples_per_second": 191.96, "eval_steps_per_second": 6.004, "step": 65000 }, { "epoch": 2.24, "learning_rate": 5.054347826086957e-06, "loss": 0.4097, "step": 66000 }, { "epoch": 2.24, "eval_accuracy": 0.7626082526744778, "eval_loss": 0.618818998336792, "eval_runtime": 51.1752, "eval_samples_per_second": 191.792, "eval_steps_per_second": 5.999, "step": 66000 }, { "epoch": 2.28, "learning_rate": 4.8278985507246375e-06, "loss": 0.4129, "step": 67000 }, { "epoch": 2.28, "eval_accuracy": 0.7639327559857361, "eval_loss": 0.6156466603279114, "eval_runtime": 51.1634, "eval_samples_per_second": 191.836, "eval_steps_per_second": 6.0, "step": 67000 }, { "epoch": 2.31, "learning_rate": 4.601449275362319e-06, "loss": 0.4085, "step": 68000 }, { "epoch": 2.31, "eval_accuracy": 0.7615894039735099, "eval_loss": 0.6232104301452637, "eval_runtime": 51.1826, "eval_samples_per_second": 191.764, "eval_steps_per_second": 5.998, "step": 68000 }, { "epoch": 2.34, "learning_rate": 4.3750000000000005e-06, "loss": 0.4074, "step": 69000 }, { "epoch": 2.34, "eval_accuracy": 0.7604686704024453, "eval_loss": 0.6239916682243347, "eval_runtime": 51.0688, "eval_samples_per_second": 192.192, "eval_steps_per_second": 6.011, "step": 69000 }, { "epoch": 2.38, "learning_rate": 4.148550724637682e-06, "loss": 0.409, "step": 70000 }, { "epoch": 2.38, "eval_accuracy": 0.7591441670911869, "eval_loss": 0.6152721643447876, "eval_runtime": 51.1662, "eval_samples_per_second": 191.826, "eval_steps_per_second": 6.0, "step": 70000 }, { "epoch": 2.41, "learning_rate": 3.9221014492753625e-06, "loss": 0.4046, "step": 71000 }, { "epoch": 2.41, "eval_accuracy": 0.7587366276107997, "eval_loss": 0.6375284194946289, "eval_runtime": 52.7067, "eval_samples_per_second": 186.219, "eval_steps_per_second": 5.825, "step": 71000 }, { "epoch": 2.45, "learning_rate": 3.6956521739130436e-06, "loss": 0.4117, "step": 72000 }, { "epoch": 2.45, "eval_accuracy": 0.7629139072847683, "eval_loss": 0.6144647598266602, "eval_runtime": 52.5798, "eval_samples_per_second": 186.669, "eval_steps_per_second": 5.839, "step": 72000 }, { "epoch": 2.48, "learning_rate": 3.4692028985507246e-06, "loss": 0.4002, "step": 73000 }, { "epoch": 2.48, "eval_accuracy": 0.7609780947529292, "eval_loss": 0.6278636455535889, "eval_runtime": 51.1134, "eval_samples_per_second": 192.024, "eval_steps_per_second": 6.006, "step": 73000 }, { "epoch": 2.51, "learning_rate": 3.242753623188406e-06, "loss": 0.4042, "step": 74000 }, { "epoch": 2.51, "eval_accuracy": 0.7646459500764137, "eval_loss": 0.6176004409790039, "eval_runtime": 51.1309, "eval_samples_per_second": 191.958, "eval_steps_per_second": 6.004, "step": 74000 }, { "epoch": 2.55, "learning_rate": 3.016304347826087e-06, "loss": 0.4055, "step": 75000 }, { "epoch": 2.55, "eval_accuracy": 0.7643402954661233, "eval_loss": 0.627702534198761, "eval_runtime": 51.1017, "eval_samples_per_second": 192.068, "eval_steps_per_second": 6.008, "step": 75000 }, { "epoch": 2.58, "learning_rate": 2.7898550724637686e-06, "loss": 0.4021, "step": 76000 }, { "epoch": 2.58, "eval_accuracy": 0.7642384105960265, "eval_loss": 0.619607150554657, "eval_runtime": 51.1091, "eval_samples_per_second": 192.04, "eval_steps_per_second": 6.007, "step": 76000 }, { "epoch": 2.62, "learning_rate": 2.563405797101449e-06, "loss": 0.4081, "step": 77000 }, { "epoch": 2.62, "eval_accuracy": 0.7658685685175751, "eval_loss": 0.6127172708511353, "eval_runtime": 51.0673, "eval_samples_per_second": 192.197, "eval_steps_per_second": 6.012, "step": 77000 }, { "epoch": 2.65, "learning_rate": 2.3369565217391307e-06, "loss": 0.408, "step": 78000 }, { "epoch": 2.65, "eval_accuracy": 0.7638308711156393, "eval_loss": 0.6236501336097717, "eval_runtime": 51.0905, "eval_samples_per_second": 192.11, "eval_steps_per_second": 6.009, "step": 78000 }, { "epoch": 2.68, "learning_rate": 2.1105072463768117e-06, "loss": 0.3997, "step": 79000 }, { "epoch": 2.68, "eval_accuracy": 0.7636271013754458, "eval_loss": 0.6190339922904968, "eval_runtime": 51.097, "eval_samples_per_second": 192.086, "eval_steps_per_second": 6.008, "step": 79000 }, { "epoch": 2.72, "learning_rate": 1.884057971014493e-06, "loss": 0.4093, "step": 80000 }, { "epoch": 2.72, "eval_accuracy": 0.7648497198166072, "eval_loss": 0.615200936794281, "eval_runtime": 51.0806, "eval_samples_per_second": 192.147, "eval_steps_per_second": 6.01, "step": 80000 }, { "epoch": 2.75, "learning_rate": 1.657608695652174e-06, "loss": 0.4095, "step": 81000 }, { "epoch": 2.75, "eval_accuracy": 0.7627101375445746, "eval_loss": 0.6154515743255615, "eval_runtime": 52.4793, "eval_samples_per_second": 187.026, "eval_steps_per_second": 5.85, "step": 81000 }, { "epoch": 2.79, "learning_rate": 1.4311594202898552e-06, "loss": 0.4088, "step": 82000 }, { "epoch": 2.79, "eval_accuracy": 0.7641365257259297, "eval_loss": 0.6130374073982239, "eval_runtime": 52.6234, "eval_samples_per_second": 186.514, "eval_steps_per_second": 5.834, "step": 82000 }, { "epoch": 2.82, "learning_rate": 1.2047101449275363e-06, "loss": 0.4063, "step": 83000 }, { "epoch": 2.82, "eval_accuracy": 0.7646459500764137, "eval_loss": 0.6072085499763489, "eval_runtime": 51.0506, "eval_samples_per_second": 192.26, "eval_steps_per_second": 6.014, "step": 83000 }, { "epoch": 2.85, "learning_rate": 9.782608695652175e-07, "loss": 0.3978, "step": 84000 }, { "epoch": 2.85, "eval_accuracy": 0.7661742231278655, "eval_loss": 0.6128284931182861, "eval_runtime": 51.2162, "eval_samples_per_second": 191.639, "eval_steps_per_second": 5.994, "step": 84000 }, { "epoch": 2.89, "learning_rate": 7.518115942028987e-07, "loss": 0.4034, "step": 85000 }, { "epoch": 2.89, "eval_accuracy": 0.7627101375445746, "eval_loss": 0.6156770586967468, "eval_runtime": 51.1115, "eval_samples_per_second": 192.031, "eval_steps_per_second": 6.006, "step": 85000 }, { "epoch": 2.92, "learning_rate": 5.253623188405797e-07, "loss": 0.4044, "step": 86000 }, { "epoch": 2.92, "eval_accuracy": 0.7660723382577688, "eval_loss": 0.6127380728721619, "eval_runtime": 51.0045, "eval_samples_per_second": 192.434, "eval_steps_per_second": 6.019, "step": 86000 }, { "epoch": 2.96, "learning_rate": 2.989130434782609e-07, "loss": 0.403, "step": 87000 }, { "epoch": 2.96, "eval_accuracy": 0.7663779928680591, "eval_loss": 0.612598717212677, "eval_runtime": 51.1269, "eval_samples_per_second": 191.973, "eval_steps_per_second": 6.005, "step": 87000 }, { "epoch": 2.99, "learning_rate": 7.246376811594204e-08, "loss": 0.4033, "step": 88000 }, { "epoch": 2.99, "eval_accuracy": 0.7661742231278655, "eval_loss": 0.6143723726272583, "eval_runtime": 50.9677, "eval_samples_per_second": 192.573, "eval_steps_per_second": 6.023, "step": 88000 }, { "epoch": 3.0, "step": 88320, "total_flos": 1.4721628163172653e+17, "train_loss": 0.5041872973027437, "train_runtime": 53589.3429, "train_samples_per_second": 52.738, "train_steps_per_second": 1.648 } ], "max_steps": 88320, "num_train_epochs": 3, "total_flos": 1.4721628163172653e+17, "trial_name": null, "trial_params": null }