|
{ |
|
"best_metric": 0.7648497198166072, |
|
"best_model_checkpoint": "/home/user/emrecan/models/bert-base-multilingual-cased_allnli_tr/checkpoint-80000", |
|
"epoch": 3.0, |
|
"global_step": 88320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9773550724637682e-05, |
|
"loss": 0.8623, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_accuracy": 0.5917473255221599, |
|
"eval_loss": 0.9075943827629089, |
|
"eval_runtime": 49.3629, |
|
"eval_samples_per_second": 198.833, |
|
"eval_steps_per_second": 6.219, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9547101449275363e-05, |
|
"loss": 0.7528, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.6119205298013245, |
|
"eval_loss": 0.8586783409118652, |
|
"eval_runtime": 49.2395, |
|
"eval_samples_per_second": 199.332, |
|
"eval_steps_per_second": 6.235, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9320652173913047e-05, |
|
"loss": 0.7074, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.664696892511462, |
|
"eval_loss": 0.7866753935813904, |
|
"eval_runtime": 49.3359, |
|
"eval_samples_per_second": 198.942, |
|
"eval_steps_per_second": 6.223, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9094202898550727e-05, |
|
"loss": 0.6949, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_accuracy": 0.6772287315333673, |
|
"eval_loss": 0.7473878264427185, |
|
"eval_runtime": 49.2961, |
|
"eval_samples_per_second": 199.103, |
|
"eval_steps_per_second": 6.228, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8867753623188408e-05, |
|
"loss": 0.6681, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.6814060112073357, |
|
"eval_loss": 0.7661426067352295, |
|
"eval_runtime": 49.2761, |
|
"eval_samples_per_second": 199.184, |
|
"eval_steps_per_second": 6.23, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.864130434782609e-05, |
|
"loss": 0.6597, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.6943453897096281, |
|
"eval_loss": 0.7264170050621033, |
|
"eval_runtime": 49.2718, |
|
"eval_samples_per_second": 199.201, |
|
"eval_steps_per_second": 6.231, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.841485507246377e-05, |
|
"loss": 0.6495, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.6781456953642384, |
|
"eval_loss": 0.7841250896453857, |
|
"eval_runtime": 49.2905, |
|
"eval_samples_per_second": 199.126, |
|
"eval_steps_per_second": 6.228, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.818840579710145e-05, |
|
"loss": 0.6323, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.6951604686704025, |
|
"eval_loss": 0.7256377339363098, |
|
"eval_runtime": 49.2057, |
|
"eval_samples_per_second": 199.469, |
|
"eval_steps_per_second": 6.239, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.7961956521739134e-05, |
|
"loss": 0.6308, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.6957717778909832, |
|
"eval_loss": 0.7319093346595764, |
|
"eval_runtime": 49.2073, |
|
"eval_samples_per_second": 199.462, |
|
"eval_steps_per_second": 6.239, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.7735507246376815e-05, |
|
"loss": 0.6254, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.7003565970453388, |
|
"eval_loss": 0.7053707838058472, |
|
"eval_runtime": 49.2797, |
|
"eval_samples_per_second": 199.169, |
|
"eval_steps_per_second": 6.23, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.7509057971014495e-05, |
|
"loss": 0.6233, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.708507386653082, |
|
"eval_loss": 0.7068630456924438, |
|
"eval_runtime": 55.8024, |
|
"eval_samples_per_second": 175.889, |
|
"eval_steps_per_second": 5.502, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.7282608695652176e-05, |
|
"loss": 0.6165, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.7180845644421804, |
|
"eval_loss": 0.687969982624054, |
|
"eval_runtime": 50.9115, |
|
"eval_samples_per_second": 192.785, |
|
"eval_steps_per_second": 6.03, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.7056159420289856e-05, |
|
"loss": 0.6033, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.719714722363729, |
|
"eval_loss": 0.6844114065170288, |
|
"eval_runtime": 49.3739, |
|
"eval_samples_per_second": 198.789, |
|
"eval_steps_per_second": 6.218, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.6829710144927537e-05, |
|
"loss": 0.6014, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.7128884360672441, |
|
"eval_loss": 0.6752753853797913, |
|
"eval_runtime": 49.3697, |
|
"eval_samples_per_second": 198.806, |
|
"eval_steps_per_second": 6.218, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.6603260869565218e-05, |
|
"loss": 0.5947, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.7039225674987264, |
|
"eval_loss": 0.7000291347503662, |
|
"eval_runtime": 49.3889, |
|
"eval_samples_per_second": 198.729, |
|
"eval_steps_per_second": 6.216, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.6376811594202898e-05, |
|
"loss": 0.5965, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.7263372389200203, |
|
"eval_loss": 0.670754075050354, |
|
"eval_runtime": 49.311, |
|
"eval_samples_per_second": 199.043, |
|
"eval_steps_per_second": 6.226, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 1.615036231884058e-05, |
|
"loss": 0.5979, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.7284768211920529, |
|
"eval_loss": 0.656209409236908, |
|
"eval_runtime": 49.4043, |
|
"eval_samples_per_second": 198.667, |
|
"eval_steps_per_second": 6.214, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.5923913043478263e-05, |
|
"loss": 0.5787, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.7296994396332145, |
|
"eval_loss": 0.6554355621337891, |
|
"eval_runtime": 50.2795, |
|
"eval_samples_per_second": 195.209, |
|
"eval_steps_per_second": 6.106, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.5697463768115943e-05, |
|
"loss": 0.58, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.7315333672949567, |
|
"eval_loss": 0.6544056534767151, |
|
"eval_runtime": 49.3813, |
|
"eval_samples_per_second": 198.76, |
|
"eval_steps_per_second": 6.217, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.5471014492753624e-05, |
|
"loss": 0.574, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.7338767193071829, |
|
"eval_loss": 0.6549123525619507, |
|
"eval_runtime": 49.3189, |
|
"eval_samples_per_second": 199.011, |
|
"eval_steps_per_second": 6.225, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.5244565217391305e-05, |
|
"loss": 0.5751, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.7288843606724401, |
|
"eval_loss": 0.6545295715332031, |
|
"eval_runtime": 52.3727, |
|
"eval_samples_per_second": 187.407, |
|
"eval_steps_per_second": 5.862, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 1.5018115942028985e-05, |
|
"loss": 0.5659, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.7371370351502802, |
|
"eval_loss": 0.6466848850250244, |
|
"eval_runtime": 51.7409, |
|
"eval_samples_per_second": 189.695, |
|
"eval_steps_per_second": 5.933, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 1.479166666666667e-05, |
|
"loss": 0.5732, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.736220071319409, |
|
"eval_loss": 0.6447662711143494, |
|
"eval_runtime": 51.1755, |
|
"eval_samples_per_second": 191.791, |
|
"eval_steps_per_second": 5.999, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.456521739130435e-05, |
|
"loss": 0.5637, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.7355068772287315, |
|
"eval_loss": 0.6520141959190369, |
|
"eval_runtime": 51.0797, |
|
"eval_samples_per_second": 192.151, |
|
"eval_steps_per_second": 6.01, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.433876811594203e-05, |
|
"loss": 0.5648, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.7344880285277636, |
|
"eval_loss": 0.6411919593811035, |
|
"eval_runtime": 51.1242, |
|
"eval_samples_per_second": 191.983, |
|
"eval_steps_per_second": 6.005, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.4112318840579711e-05, |
|
"loss": 0.5622, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.735812531839022, |
|
"eval_loss": 0.6350020170211792, |
|
"eval_runtime": 51.0256, |
|
"eval_samples_per_second": 192.355, |
|
"eval_steps_per_second": 6.017, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.3885869565217392e-05, |
|
"loss": 0.5579, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.7392766174223128, |
|
"eval_loss": 0.6346594095230103, |
|
"eval_runtime": 51.1443, |
|
"eval_samples_per_second": 191.908, |
|
"eval_steps_per_second": 6.003, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.3659420289855074e-05, |
|
"loss": 0.5518, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.739174732552216, |
|
"eval_loss": 0.6417071223258972, |
|
"eval_runtime": 52.294, |
|
"eval_samples_per_second": 187.689, |
|
"eval_steps_per_second": 5.871, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.3432971014492755e-05, |
|
"loss": 0.5547, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.7436576668364748, |
|
"eval_loss": 0.6321312785148621, |
|
"eval_runtime": 51.1442, |
|
"eval_samples_per_second": 191.908, |
|
"eval_steps_per_second": 6.003, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.3206521739130435e-05, |
|
"loss": 0.524, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.7412124299541518, |
|
"eval_loss": 0.6430493593215942, |
|
"eval_runtime": 51.2108, |
|
"eval_samples_per_second": 191.659, |
|
"eval_steps_per_second": 5.995, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 1.2980072463768116e-05, |
|
"loss": 0.4982, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_accuracy": 0.7457972491085074, |
|
"eval_loss": 0.6252649426460266, |
|
"eval_runtime": 57.7511, |
|
"eval_samples_per_second": 169.954, |
|
"eval_steps_per_second": 5.316, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 1.2753623188405797e-05, |
|
"loss": 0.5002, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_accuracy": 0.7418237391747325, |
|
"eval_loss": 0.6316350698471069, |
|
"eval_runtime": 52.7966, |
|
"eval_samples_per_second": 185.902, |
|
"eval_steps_per_second": 5.815, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 1.252717391304348e-05, |
|
"loss": 0.4993, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_accuracy": 0.7486500254712175, |
|
"eval_loss": 0.6196975111961365, |
|
"eval_runtime": 51.1876, |
|
"eval_samples_per_second": 191.746, |
|
"eval_steps_per_second": 5.998, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 1.2300724637681161e-05, |
|
"loss": 0.4963, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.7462047885888945, |
|
"eval_loss": 0.6307246685028076, |
|
"eval_runtime": 51.0522, |
|
"eval_samples_per_second": 192.254, |
|
"eval_steps_per_second": 6.013, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 1.2074275362318842e-05, |
|
"loss": 0.504, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_accuracy": 0.7480387162506368, |
|
"eval_loss": 0.627221405506134, |
|
"eval_runtime": 51.0856, |
|
"eval_samples_per_second": 192.129, |
|
"eval_steps_per_second": 6.01, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 1.1847826086956522e-05, |
|
"loss": 0.4922, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_accuracy": 0.7432501273560876, |
|
"eval_loss": 0.6410390138626099, |
|
"eval_runtime": 51.1349, |
|
"eval_samples_per_second": 191.943, |
|
"eval_steps_per_second": 6.004, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 1.1621376811594205e-05, |
|
"loss": 0.5016, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"eval_accuracy": 0.7461029037187977, |
|
"eval_loss": 0.6295490860939026, |
|
"eval_runtime": 51.0732, |
|
"eval_samples_per_second": 192.175, |
|
"eval_steps_per_second": 6.011, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 1.1394927536231885e-05, |
|
"loss": 0.4957, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_accuracy": 0.7505858380030566, |
|
"eval_loss": 0.6182788014411926, |
|
"eval_runtime": 51.1969, |
|
"eval_samples_per_second": 191.711, |
|
"eval_steps_per_second": 5.996, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.1168478260869566e-05, |
|
"loss": 0.4883, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.7501782985226694, |
|
"eval_loss": 0.6260754466056824, |
|
"eval_runtime": 51.1903, |
|
"eval_samples_per_second": 191.735, |
|
"eval_steps_per_second": 5.997, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 1.0942028985507247e-05, |
|
"loss": 0.4985, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_accuracy": 0.7495669893020886, |
|
"eval_loss": 0.6315430402755737, |
|
"eval_runtime": 51.1506, |
|
"eval_samples_per_second": 191.884, |
|
"eval_steps_per_second": 6.002, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.0715579710144927e-05, |
|
"loss": 0.4885, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_accuracy": 0.7529291900152827, |
|
"eval_loss": 0.6188690066337585, |
|
"eval_runtime": 52.5846, |
|
"eval_samples_per_second": 186.651, |
|
"eval_steps_per_second": 5.838, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.0489130434782611e-05, |
|
"loss": 0.4909, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_accuracy": 0.7473255221599593, |
|
"eval_loss": 0.6188654899597168, |
|
"eval_runtime": 51.6594, |
|
"eval_samples_per_second": 189.994, |
|
"eval_steps_per_second": 5.943, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.0262681159420292e-05, |
|
"loss": 0.4894, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_accuracy": 0.7432501273560876, |
|
"eval_loss": 0.631429135799408, |
|
"eval_runtime": 51.1492, |
|
"eval_samples_per_second": 191.889, |
|
"eval_steps_per_second": 6.002, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 1.0036231884057972e-05, |
|
"loss": 0.4912, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"eval_accuracy": 0.7445746306673459, |
|
"eval_loss": 0.6183902025222778, |
|
"eval_runtime": 51.0914, |
|
"eval_samples_per_second": 192.107, |
|
"eval_steps_per_second": 6.009, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 9.809782608695653e-06, |
|
"loss": 0.4851, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.7461029037187977, |
|
"eval_loss": 0.6257576942443848, |
|
"eval_runtime": 50.9837, |
|
"eval_samples_per_second": 192.513, |
|
"eval_steps_per_second": 6.022, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 9.583333333333335e-06, |
|
"loss": 0.4879, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_accuracy": 0.7480387162506368, |
|
"eval_loss": 0.6286013126373291, |
|
"eval_runtime": 51.1172, |
|
"eval_samples_per_second": 192.01, |
|
"eval_steps_per_second": 6.006, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 9.356884057971016e-06, |
|
"loss": 0.4907, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.7511971472236373, |
|
"eval_loss": 0.6196326613426208, |
|
"eval_runtime": 51.072, |
|
"eval_samples_per_second": 192.18, |
|
"eval_steps_per_second": 6.011, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 9.130434782608697e-06, |
|
"loss": 0.4884, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"eval_accuracy": 0.7526235354049924, |
|
"eval_loss": 0.6156549453735352, |
|
"eval_runtime": 51.1757, |
|
"eval_samples_per_second": 191.79, |
|
"eval_steps_per_second": 5.999, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 8.903985507246377e-06, |
|
"loss": 0.4755, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_accuracy": 0.7591441670911869, |
|
"eval_loss": 0.6055976152420044, |
|
"eval_runtime": 50.9202, |
|
"eval_samples_per_second": 192.752, |
|
"eval_steps_per_second": 6.029, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.677536231884058e-06, |
|
"loss": 0.4811, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_accuracy": 0.7582272032603159, |
|
"eval_loss": 0.5976621508598328, |
|
"eval_runtime": 51.1974, |
|
"eval_samples_per_second": 191.709, |
|
"eval_steps_per_second": 5.996, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 8.45108695652174e-06, |
|
"loss": 0.4787, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_accuracy": 0.7620988283239939, |
|
"eval_loss": 0.5914710164070129, |
|
"eval_runtime": 52.4733, |
|
"eval_samples_per_second": 187.047, |
|
"eval_steps_per_second": 5.851, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 8.22463768115942e-06, |
|
"loss": 0.4779, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_accuracy": 0.7583290881304127, |
|
"eval_loss": 0.6014041304588318, |
|
"eval_runtime": 51.6886, |
|
"eval_samples_per_second": 189.887, |
|
"eval_steps_per_second": 5.939, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 7.998188405797103e-06, |
|
"loss": 0.4767, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.7623025980641874, |
|
"eval_loss": 0.6041266918182373, |
|
"eval_runtime": 51.0745, |
|
"eval_samples_per_second": 192.17, |
|
"eval_steps_per_second": 6.011, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 7.771739130434784e-06, |
|
"loss": 0.4737, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"eval_accuracy": 0.7562913907284768, |
|
"eval_loss": 0.6093412637710571, |
|
"eval_runtime": 51.0594, |
|
"eval_samples_per_second": 192.227, |
|
"eval_steps_per_second": 6.013, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 7.545289855072464e-06, |
|
"loss": 0.4836, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.7568008150789608, |
|
"eval_loss": 0.6001136302947998, |
|
"eval_runtime": 51.1468, |
|
"eval_samples_per_second": 191.899, |
|
"eval_steps_per_second": 6.002, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 7.318840579710146e-06, |
|
"loss": 0.4765, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_accuracy": 0.7600611309220581, |
|
"eval_loss": 0.6109462380409241, |
|
"eval_runtime": 51.0882, |
|
"eval_samples_per_second": 192.119, |
|
"eval_steps_per_second": 6.009, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 7.092391304347826e-06, |
|
"loss": 0.4776, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_accuracy": 0.7598573611818645, |
|
"eval_loss": 0.6045997142791748, |
|
"eval_runtime": 51.0831, |
|
"eval_samples_per_second": 192.138, |
|
"eval_steps_per_second": 6.01, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 6.865942028985509e-06, |
|
"loss": 0.4769, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.7568008150789608, |
|
"eval_loss": 0.5969610214233398, |
|
"eval_runtime": 51.092, |
|
"eval_samples_per_second": 192.105, |
|
"eval_steps_per_second": 6.009, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 6.639492753623189e-06, |
|
"loss": 0.4654, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7613856342333164, |
|
"eval_loss": 0.6146702170372009, |
|
"eval_runtime": 51.0794, |
|
"eval_samples_per_second": 192.152, |
|
"eval_steps_per_second": 6.01, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 6.41304347826087e-06, |
|
"loss": 0.4144, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_accuracy": 0.7565970453387671, |
|
"eval_loss": 0.6438983678817749, |
|
"eval_runtime": 51.1073, |
|
"eval_samples_per_second": 192.047, |
|
"eval_steps_per_second": 6.007, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 6.186594202898551e-06, |
|
"loss": 0.4101, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_accuracy": 0.7527254202750892, |
|
"eval_loss": 0.637277364730835, |
|
"eval_runtime": 52.7247, |
|
"eval_samples_per_second": 186.156, |
|
"eval_steps_per_second": 5.823, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5.960144927536232e-06, |
|
"loss": 0.4192, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_accuracy": 0.7575140091696383, |
|
"eval_loss": 0.6135603189468384, |
|
"eval_runtime": 52.6526, |
|
"eval_samples_per_second": 186.411, |
|
"eval_steps_per_second": 5.831, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 5.733695652173914e-06, |
|
"loss": 0.4128, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.7559857361181864, |
|
"eval_loss": 0.6282700896263123, |
|
"eval_runtime": 51.1146, |
|
"eval_samples_per_second": 192.02, |
|
"eval_steps_per_second": 6.006, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 5.507246376811595e-06, |
|
"loss": 0.4204, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_accuracy": 0.7625063678043811, |
|
"eval_loss": 0.618690013885498, |
|
"eval_runtime": 51.1974, |
|
"eval_samples_per_second": 191.709, |
|
"eval_steps_per_second": 5.996, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 5.2807971014492755e-06, |
|
"loss": 0.4114, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"eval_accuracy": 0.7620988283239939, |
|
"eval_loss": 0.6127009987831116, |
|
"eval_runtime": 51.1304, |
|
"eval_samples_per_second": 191.96, |
|
"eval_steps_per_second": 6.004, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 5.054347826086957e-06, |
|
"loss": 0.4097, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_accuracy": 0.7626082526744778, |
|
"eval_loss": 0.618818998336792, |
|
"eval_runtime": 51.1752, |
|
"eval_samples_per_second": 191.792, |
|
"eval_steps_per_second": 5.999, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 4.8278985507246375e-06, |
|
"loss": 0.4129, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_accuracy": 0.7639327559857361, |
|
"eval_loss": 0.6156466603279114, |
|
"eval_runtime": 51.1634, |
|
"eval_samples_per_second": 191.836, |
|
"eval_steps_per_second": 6.0, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.601449275362319e-06, |
|
"loss": 0.4085, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_accuracy": 0.7615894039735099, |
|
"eval_loss": 0.6232104301452637, |
|
"eval_runtime": 51.1826, |
|
"eval_samples_per_second": 191.764, |
|
"eval_steps_per_second": 5.998, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.3750000000000005e-06, |
|
"loss": 0.4074, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_accuracy": 0.7604686704024453, |
|
"eval_loss": 0.6239916682243347, |
|
"eval_runtime": 51.0688, |
|
"eval_samples_per_second": 192.192, |
|
"eval_steps_per_second": 6.011, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 4.148550724637682e-06, |
|
"loss": 0.409, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_accuracy": 0.7591441670911869, |
|
"eval_loss": 0.6152721643447876, |
|
"eval_runtime": 51.1662, |
|
"eval_samples_per_second": 191.826, |
|
"eval_steps_per_second": 6.0, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 3.9221014492753625e-06, |
|
"loss": 0.4046, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"eval_accuracy": 0.7587366276107997, |
|
"eval_loss": 0.6375284194946289, |
|
"eval_runtime": 52.7067, |
|
"eval_samples_per_second": 186.219, |
|
"eval_steps_per_second": 5.825, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 3.6956521739130436e-06, |
|
"loss": 0.4117, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_accuracy": 0.7629139072847683, |
|
"eval_loss": 0.6144647598266602, |
|
"eval_runtime": 52.5798, |
|
"eval_samples_per_second": 186.669, |
|
"eval_steps_per_second": 5.839, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 3.4692028985507246e-06, |
|
"loss": 0.4002, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_accuracy": 0.7609780947529292, |
|
"eval_loss": 0.6278636455535889, |
|
"eval_runtime": 51.1134, |
|
"eval_samples_per_second": 192.024, |
|
"eval_steps_per_second": 6.006, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 3.242753623188406e-06, |
|
"loss": 0.4042, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"eval_accuracy": 0.7646459500764137, |
|
"eval_loss": 0.6176004409790039, |
|
"eval_runtime": 51.1309, |
|
"eval_samples_per_second": 191.958, |
|
"eval_steps_per_second": 6.004, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 3.016304347826087e-06, |
|
"loss": 0.4055, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_accuracy": 0.7643402954661233, |
|
"eval_loss": 0.627702534198761, |
|
"eval_runtime": 51.1017, |
|
"eval_samples_per_second": 192.068, |
|
"eval_steps_per_second": 6.008, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.7898550724637686e-06, |
|
"loss": 0.4021, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"eval_accuracy": 0.7642384105960265, |
|
"eval_loss": 0.619607150554657, |
|
"eval_runtime": 51.1091, |
|
"eval_samples_per_second": 192.04, |
|
"eval_steps_per_second": 6.007, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 2.563405797101449e-06, |
|
"loss": 0.4081, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_accuracy": 0.7658685685175751, |
|
"eval_loss": 0.6127172708511353, |
|
"eval_runtime": 51.0673, |
|
"eval_samples_per_second": 192.197, |
|
"eval_steps_per_second": 6.012, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.3369565217391307e-06, |
|
"loss": 0.408, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"eval_accuracy": 0.7638308711156393, |
|
"eval_loss": 0.6236501336097717, |
|
"eval_runtime": 51.0905, |
|
"eval_samples_per_second": 192.11, |
|
"eval_steps_per_second": 6.009, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 2.1105072463768117e-06, |
|
"loss": 0.3997, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.7636271013754458, |
|
"eval_loss": 0.6190339922904968, |
|
"eval_runtime": 51.097, |
|
"eval_samples_per_second": 192.086, |
|
"eval_steps_per_second": 6.008, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 1.884057971014493e-06, |
|
"loss": 0.4093, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_accuracy": 0.7648497198166072, |
|
"eval_loss": 0.615200936794281, |
|
"eval_runtime": 51.0806, |
|
"eval_samples_per_second": 192.147, |
|
"eval_steps_per_second": 6.01, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.657608695652174e-06, |
|
"loss": 0.4095, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_accuracy": 0.7627101375445746, |
|
"eval_loss": 0.6154515743255615, |
|
"eval_runtime": 52.4793, |
|
"eval_samples_per_second": 187.026, |
|
"eval_steps_per_second": 5.85, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.4311594202898552e-06, |
|
"loss": 0.4088, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"eval_accuracy": 0.7641365257259297, |
|
"eval_loss": 0.6130374073982239, |
|
"eval_runtime": 52.6234, |
|
"eval_samples_per_second": 186.514, |
|
"eval_steps_per_second": 5.834, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 1.2047101449275363e-06, |
|
"loss": 0.4063, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_accuracy": 0.7646459500764137, |
|
"eval_loss": 0.6072085499763489, |
|
"eval_runtime": 51.0506, |
|
"eval_samples_per_second": 192.26, |
|
"eval_steps_per_second": 6.014, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 9.782608695652175e-07, |
|
"loss": 0.3978, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_accuracy": 0.7661742231278655, |
|
"eval_loss": 0.6128284931182861, |
|
"eval_runtime": 51.2162, |
|
"eval_samples_per_second": 191.639, |
|
"eval_steps_per_second": 5.994, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 7.518115942028987e-07, |
|
"loss": 0.4034, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_accuracy": 0.7627101375445746, |
|
"eval_loss": 0.6156770586967468, |
|
"eval_runtime": 51.1115, |
|
"eval_samples_per_second": 192.031, |
|
"eval_steps_per_second": 6.006, |
|
"step": 85000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 5.253623188405797e-07, |
|
"loss": 0.4044, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"eval_accuracy": 0.7660723382577688, |
|
"eval_loss": 0.6127380728721619, |
|
"eval_runtime": 51.0045, |
|
"eval_samples_per_second": 192.434, |
|
"eval_steps_per_second": 6.019, |
|
"step": 86000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 2.989130434782609e-07, |
|
"loss": 0.403, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.7663779928680591, |
|
"eval_loss": 0.612598717212677, |
|
"eval_runtime": 51.1269, |
|
"eval_samples_per_second": 191.973, |
|
"eval_steps_per_second": 6.005, |
|
"step": 87000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 7.246376811594204e-08, |
|
"loss": 0.4033, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.7661742231278655, |
|
"eval_loss": 0.6143723726272583, |
|
"eval_runtime": 50.9677, |
|
"eval_samples_per_second": 192.573, |
|
"eval_steps_per_second": 6.023, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 88320, |
|
"total_flos": 1.4721628163172653e+17, |
|
"train_loss": 0.5041872973027437, |
|
"train_runtime": 53589.3429, |
|
"train_samples_per_second": 52.738, |
|
"train_steps_per_second": 1.648 |
|
} |
|
], |
|
"max_steps": 88320, |
|
"num_train_epochs": 3, |
|
"total_flos": 1.4721628163172653e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|