| { |
| "best_metric": 42.8213, |
| "best_model_checkpoint": "facebook/bart-base-finetuned/checkpoint-60000", |
| "epoch": 7.8237058286608425, |
| "eval_steps": 5000, |
| "global_step": 60000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.967401225713913e-05, |
| "loss": 1.5862, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 3.9348024514278265e-05, |
| "loss": 1.0258, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 3.90220367714174e-05, |
| "loss": 0.8928, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 3.869604902855653e-05, |
| "loss": 0.8012, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 3.837006128569566e-05, |
| "loss": 0.7591, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 3.804407354283479e-05, |
| "loss": 0.7188, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 3.7718085799973924e-05, |
| "loss": 0.6802, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 3.739209805711306e-05, |
| "loss": 0.6529, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 3.7066110314252186e-05, |
| "loss": 0.6373, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 3.674012257139132e-05, |
| "loss": 0.6121, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.65, |
| "eval_bleu": 37.8898, |
| "eval_gen_len": 18.0188, |
| "eval_loss": 0.5747564435005188, |
| "eval_runtime": 121.8837, |
| "eval_samples_per_second": 41.105, |
| "eval_steps_per_second": 0.648, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 3.641413482853045e-05, |
| "loss": 0.5929, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 3.608814708566958e-05, |
| "loss": 0.5763, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.576215934280872e-05, |
| "loss": 0.5636, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.5436171599947845e-05, |
| "loss": 0.5576, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.511018385708698e-05, |
| "loss": 0.5442, |
| "step": 7500 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.478419611422611e-05, |
| "loss": 0.5211, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.445820837136524e-05, |
| "loss": 0.508, |
| "step": 8500 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.413222062850437e-05, |
| "loss": 0.5007, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 3.38062328856435e-05, |
| "loss": 0.5062, |
| "step": 9500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 3.348024514278264e-05, |
| "loss": 0.4967, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.3, |
| "eval_bleu": 39.3429, |
| "eval_gen_len": 18.0347, |
| "eval_loss": 0.4752778112888336, |
| "eval_runtime": 118.4418, |
| "eval_samples_per_second": 42.299, |
| "eval_steps_per_second": 0.667, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 3.3154257399921765e-05, |
| "loss": 0.4876, |
| "step": 10500 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 3.282826965706089e-05, |
| "loss": 0.4762, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 3.250228191420003e-05, |
| "loss": 0.4716, |
| "step": 11500 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 3.217629417133916e-05, |
| "loss": 0.4632, |
| "step": 12000 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 3.185030642847829e-05, |
| "loss": 0.4588, |
| "step": 12500 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 3.1524318685617424e-05, |
| "loss": 0.4605, |
| "step": 13000 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 3.119833094275655e-05, |
| "loss": 0.457, |
| "step": 13500 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 3.0872343199895686e-05, |
| "loss": 0.4573, |
| "step": 14000 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 3.054635545703482e-05, |
| "loss": 0.4527, |
| "step": 14500 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 3.0220367714173952e-05, |
| "loss": 0.4437, |
| "step": 15000 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_bleu": 40.7435, |
| "eval_gen_len": 18.2399, |
| "eval_loss": 0.4312632977962494, |
| "eval_runtime": 118.0141, |
| "eval_samples_per_second": 42.453, |
| "eval_steps_per_second": 0.669, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 2.9894379971313083e-05, |
| "loss": 0.442, |
| "step": 15500 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 2.956839222845221e-05, |
| "loss": 0.4233, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 2.924240448559134e-05, |
| "loss": 0.4186, |
| "step": 16500 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 2.891641674273048e-05, |
| "loss": 0.4156, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 2.8590428999869607e-05, |
| "loss": 0.4211, |
| "step": 17500 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 2.8264441257008738e-05, |
| "loss": 0.4177, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 2.793845351414787e-05, |
| "loss": 0.4088, |
| "step": 18500 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 2.7612465771287e-05, |
| "loss": 0.4082, |
| "step": 19000 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 2.7286478028426135e-05, |
| "loss": 0.4094, |
| "step": 19500 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 2.6960490285565266e-05, |
| "loss": 0.4064, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.61, |
| "eval_bleu": 41.105, |
| "eval_gen_len": 18.2493, |
| "eval_loss": 0.4060722589492798, |
| "eval_runtime": 118.819, |
| "eval_samples_per_second": 42.165, |
| "eval_steps_per_second": 0.665, |
| "step": 20000 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 2.6634502542704397e-05, |
| "loss": 0.4069, |
| "step": 20500 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 2.6308514799843528e-05, |
| "loss": 0.4023, |
| "step": 21000 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 2.598252705698266e-05, |
| "loss": 0.3994, |
| "step": 21500 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.5656539314121793e-05, |
| "loss": 0.4005, |
| "step": 22000 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 2.5330551571260924e-05, |
| "loss": 0.4006, |
| "step": 22500 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 2.5004563828400055e-05, |
| "loss": 0.4013, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.06, |
| "learning_rate": 2.4678576085539186e-05, |
| "loss": 0.3765, |
| "step": 23500 |
| }, |
| { |
| "epoch": 3.13, |
| "learning_rate": 2.4352588342678317e-05, |
| "loss": 0.3796, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 2.402660059981745e-05, |
| "loss": 0.3726, |
| "step": 24500 |
| }, |
| { |
| "epoch": 3.26, |
| "learning_rate": 2.3700612856956583e-05, |
| "loss": 0.3795, |
| "step": 25000 |
| }, |
| { |
| "epoch": 3.26, |
| "eval_bleu": 42.0027, |
| "eval_gen_len": 18.2441, |
| "eval_loss": 0.3876380920410156, |
| "eval_runtime": 120.3412, |
| "eval_samples_per_second": 41.632, |
| "eval_steps_per_second": 0.656, |
| "step": 25000 |
| }, |
| { |
| "epoch": 3.33, |
| "learning_rate": 2.3374625114095714e-05, |
| "loss": 0.3766, |
| "step": 25500 |
| }, |
| { |
| "epoch": 3.39, |
| "learning_rate": 2.304863737123484e-05, |
| "loss": 0.38, |
| "step": 26000 |
| }, |
| { |
| "epoch": 3.46, |
| "learning_rate": 2.2722649628373973e-05, |
| "loss": 0.3732, |
| "step": 26500 |
| }, |
| { |
| "epoch": 3.52, |
| "learning_rate": 2.2396661885513104e-05, |
| "loss": 0.3719, |
| "step": 27000 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 2.2070674142652238e-05, |
| "loss": 0.3693, |
| "step": 27500 |
| }, |
| { |
| "epoch": 3.65, |
| "learning_rate": 2.174468639979137e-05, |
| "loss": 0.3758, |
| "step": 28000 |
| }, |
| { |
| "epoch": 3.72, |
| "learning_rate": 2.14186986569305e-05, |
| "loss": 0.3599, |
| "step": 28500 |
| }, |
| { |
| "epoch": 3.78, |
| "learning_rate": 2.109271091406963e-05, |
| "loss": 0.3673, |
| "step": 29000 |
| }, |
| { |
| "epoch": 3.85, |
| "learning_rate": 2.0766723171208762e-05, |
| "loss": 0.369, |
| "step": 29500 |
| }, |
| { |
| "epoch": 3.91, |
| "learning_rate": 2.0440735428347897e-05, |
| "loss": 0.3728, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.91, |
| "eval_bleu": 42.191, |
| "eval_gen_len": 18.2934, |
| "eval_loss": 0.3774366080760956, |
| "eval_runtime": 118.342, |
| "eval_samples_per_second": 42.335, |
| "eval_steps_per_second": 0.668, |
| "step": 30000 |
| }, |
| { |
| "epoch": 3.98, |
| "learning_rate": 2.0114747685487028e-05, |
| "loss": 0.3673, |
| "step": 30500 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 1.978875994262616e-05, |
| "loss": 0.3562, |
| "step": 31000 |
| }, |
| { |
| "epoch": 4.11, |
| "learning_rate": 1.946277219976529e-05, |
| "loss": 0.3518, |
| "step": 31500 |
| }, |
| { |
| "epoch": 4.17, |
| "learning_rate": 1.913678445690442e-05, |
| "loss": 0.3484, |
| "step": 32000 |
| }, |
| { |
| "epoch": 4.24, |
| "learning_rate": 1.8810796714043552e-05, |
| "loss": 0.3489, |
| "step": 32500 |
| }, |
| { |
| "epoch": 4.3, |
| "learning_rate": 1.8484808971182686e-05, |
| "loss": 0.3529, |
| "step": 33000 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 1.8158821228321817e-05, |
| "loss": 0.347, |
| "step": 33500 |
| }, |
| { |
| "epoch": 4.43, |
| "learning_rate": 1.783283348546095e-05, |
| "loss": 0.3442, |
| "step": 34000 |
| }, |
| { |
| "epoch": 4.5, |
| "learning_rate": 1.750684574260008e-05, |
| "loss": 0.3498, |
| "step": 34500 |
| }, |
| { |
| "epoch": 4.56, |
| "learning_rate": 1.718085799973921e-05, |
| "loss": 0.3464, |
| "step": 35000 |
| }, |
| { |
| "epoch": 4.56, |
| "eval_bleu": 42.1307, |
| "eval_gen_len": 18.2251, |
| "eval_loss": 0.3665723502635956, |
| "eval_runtime": 117.0033, |
| "eval_samples_per_second": 42.819, |
| "eval_steps_per_second": 0.675, |
| "step": 35000 |
| }, |
| { |
| "epoch": 4.63, |
| "learning_rate": 1.685487025687834e-05, |
| "loss": 0.3484, |
| "step": 35500 |
| }, |
| { |
| "epoch": 4.69, |
| "learning_rate": 1.6528882514017476e-05, |
| "loss": 0.3432, |
| "step": 36000 |
| }, |
| { |
| "epoch": 4.76, |
| "learning_rate": 1.6202894771156604e-05, |
| "loss": 0.3477, |
| "step": 36500 |
| }, |
| { |
| "epoch": 4.82, |
| "learning_rate": 1.5876907028295738e-05, |
| "loss": 0.3476, |
| "step": 37000 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 1.555091928543487e-05, |
| "loss": 0.3471, |
| "step": 37500 |
| }, |
| { |
| "epoch": 4.96, |
| "learning_rate": 1.5224931542574e-05, |
| "loss": 0.3462, |
| "step": 38000 |
| }, |
| { |
| "epoch": 5.02, |
| "learning_rate": 1.4898943799713133e-05, |
| "loss": 0.3407, |
| "step": 38500 |
| }, |
| { |
| "epoch": 5.09, |
| "learning_rate": 1.4572956056852262e-05, |
| "loss": 0.3312, |
| "step": 39000 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 1.4246968313991397e-05, |
| "loss": 0.3297, |
| "step": 39500 |
| }, |
| { |
| "epoch": 5.22, |
| "learning_rate": 1.3920980571130526e-05, |
| "loss": 0.3321, |
| "step": 40000 |
| }, |
| { |
| "epoch": 5.22, |
| "eval_bleu": 42.2362, |
| "eval_gen_len": 18.1764, |
| "eval_loss": 0.36170148849487305, |
| "eval_runtime": 117.3524, |
| "eval_samples_per_second": 42.692, |
| "eval_steps_per_second": 0.673, |
| "step": 40000 |
| }, |
| { |
| "epoch": 5.28, |
| "learning_rate": 1.3594992828269657e-05, |
| "loss": 0.33, |
| "step": 40500 |
| }, |
| { |
| "epoch": 5.35, |
| "learning_rate": 1.326900508540879e-05, |
| "loss": 0.3308, |
| "step": 41000 |
| }, |
| { |
| "epoch": 5.41, |
| "learning_rate": 1.2943017342547921e-05, |
| "loss": 0.3305, |
| "step": 41500 |
| }, |
| { |
| "epoch": 5.48, |
| "learning_rate": 1.2617029599687052e-05, |
| "loss": 0.3259, |
| "step": 42000 |
| }, |
| { |
| "epoch": 5.54, |
| "learning_rate": 1.2291041856826185e-05, |
| "loss": 0.3337, |
| "step": 42500 |
| }, |
| { |
| "epoch": 5.61, |
| "learning_rate": 1.1965054113965316e-05, |
| "loss": 0.3334, |
| "step": 43000 |
| }, |
| { |
| "epoch": 5.67, |
| "learning_rate": 1.1639066371104449e-05, |
| "loss": 0.329, |
| "step": 43500 |
| }, |
| { |
| "epoch": 5.74, |
| "learning_rate": 1.131307862824358e-05, |
| "loss": 0.3306, |
| "step": 44000 |
| }, |
| { |
| "epoch": 5.8, |
| "learning_rate": 1.0987090885382709e-05, |
| "loss": 0.33, |
| "step": 44500 |
| }, |
| { |
| "epoch": 5.87, |
| "learning_rate": 1.0661103142521842e-05, |
| "loss": 0.3264, |
| "step": 45000 |
| }, |
| { |
| "epoch": 5.87, |
| "eval_bleu": 42.4323, |
| "eval_gen_len": 18.2745, |
| "eval_loss": 0.3556722104549408, |
| "eval_runtime": 116.9876, |
| "eval_samples_per_second": 42.825, |
| "eval_steps_per_second": 0.675, |
| "step": 45000 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 1.0335115399660973e-05, |
| "loss": 0.3275, |
| "step": 45500 |
| }, |
| { |
| "epoch": 6.0, |
| "learning_rate": 1.0009127656800106e-05, |
| "loss": 0.3225, |
| "step": 46000 |
| }, |
| { |
| "epoch": 6.06, |
| "learning_rate": 9.683139913939237e-06, |
| "loss": 0.3143, |
| "step": 46500 |
| }, |
| { |
| "epoch": 6.13, |
| "learning_rate": 9.35715217107837e-06, |
| "loss": 0.3169, |
| "step": 47000 |
| }, |
| { |
| "epoch": 6.19, |
| "learning_rate": 9.031164428217499e-06, |
| "loss": 0.3134, |
| "step": 47500 |
| }, |
| { |
| "epoch": 6.26, |
| "learning_rate": 8.705176685356631e-06, |
| "loss": 0.3218, |
| "step": 48000 |
| }, |
| { |
| "epoch": 6.32, |
| "learning_rate": 8.379188942495763e-06, |
| "loss": 0.3169, |
| "step": 48500 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 8.053201199634895e-06, |
| "loss": 0.3163, |
| "step": 49000 |
| }, |
| { |
| "epoch": 6.45, |
| "learning_rate": 7.727213456774026e-06, |
| "loss": 0.3199, |
| "step": 49500 |
| }, |
| { |
| "epoch": 6.52, |
| "learning_rate": 7.401225713913157e-06, |
| "loss": 0.321, |
| "step": 50000 |
| }, |
| { |
| "epoch": 6.52, |
| "eval_bleu": 42.797, |
| "eval_gen_len": 18.177, |
| "eval_loss": 0.3521122634410858, |
| "eval_runtime": 118.7785, |
| "eval_samples_per_second": 42.179, |
| "eval_steps_per_second": 0.665, |
| "step": 50000 |
| }, |
| { |
| "epoch": 6.58, |
| "learning_rate": 7.075237971052289e-06, |
| "loss": 0.3166, |
| "step": 50500 |
| }, |
| { |
| "epoch": 6.65, |
| "learning_rate": 6.74925022819142e-06, |
| "loss": 0.3137, |
| "step": 51000 |
| }, |
| { |
| "epoch": 6.72, |
| "learning_rate": 6.423262485330552e-06, |
| "loss": 0.314, |
| "step": 51500 |
| }, |
| { |
| "epoch": 6.78, |
| "learning_rate": 6.097274742469683e-06, |
| "loss": 0.3163, |
| "step": 52000 |
| }, |
| { |
| "epoch": 6.85, |
| "learning_rate": 5.771286999608815e-06, |
| "loss": 0.3177, |
| "step": 52500 |
| }, |
| { |
| "epoch": 6.91, |
| "learning_rate": 5.445299256747947e-06, |
| "loss": 0.3153, |
| "step": 53000 |
| }, |
| { |
| "epoch": 6.98, |
| "learning_rate": 5.119311513887079e-06, |
| "loss": 0.3153, |
| "step": 53500 |
| }, |
| { |
| "epoch": 7.04, |
| "learning_rate": 4.79332377102621e-06, |
| "loss": 0.3095, |
| "step": 54000 |
| }, |
| { |
| "epoch": 7.11, |
| "learning_rate": 4.467336028165342e-06, |
| "loss": 0.3073, |
| "step": 54500 |
| }, |
| { |
| "epoch": 7.17, |
| "learning_rate": 4.141348285304473e-06, |
| "loss": 0.3063, |
| "step": 55000 |
| }, |
| { |
| "epoch": 7.17, |
| "eval_bleu": 42.6295, |
| "eval_gen_len": 18.2265, |
| "eval_loss": 0.3495286703109741, |
| "eval_runtime": 120.504, |
| "eval_samples_per_second": 41.575, |
| "eval_steps_per_second": 0.656, |
| "step": 55000 |
| }, |
| { |
| "epoch": 7.24, |
| "learning_rate": 3.815360542443605e-06, |
| "loss": 0.3107, |
| "step": 55500 |
| }, |
| { |
| "epoch": 7.3, |
| "learning_rate": 3.489372799582736e-06, |
| "loss": 0.3138, |
| "step": 56000 |
| }, |
| { |
| "epoch": 7.37, |
| "learning_rate": 3.163385056721868e-06, |
| "loss": 0.3092, |
| "step": 56500 |
| }, |
| { |
| "epoch": 7.43, |
| "learning_rate": 2.837397313860999e-06, |
| "loss": 0.3097, |
| "step": 57000 |
| }, |
| { |
| "epoch": 7.5, |
| "learning_rate": 2.5114095710001308e-06, |
| "loss": 0.3057, |
| "step": 57500 |
| }, |
| { |
| "epoch": 7.56, |
| "learning_rate": 2.1854218281392622e-06, |
| "loss": 0.3045, |
| "step": 58000 |
| }, |
| { |
| "epoch": 7.63, |
| "learning_rate": 1.8594340852783937e-06, |
| "loss": 0.3088, |
| "step": 58500 |
| }, |
| { |
| "epoch": 7.69, |
| "learning_rate": 1.5334463424175252e-06, |
| "loss": 0.306, |
| "step": 59000 |
| }, |
| { |
| "epoch": 7.76, |
| "learning_rate": 1.2074585995566569e-06, |
| "loss": 0.3118, |
| "step": 59500 |
| }, |
| { |
| "epoch": 7.82, |
| "learning_rate": 8.814708566957883e-07, |
| "loss": 0.3043, |
| "step": 60000 |
| }, |
| { |
| "epoch": 7.82, |
| "eval_bleu": 42.8213, |
| "eval_gen_len": 18.2248, |
| "eval_loss": 0.34873583912849426, |
| "eval_runtime": 121.0894, |
| "eval_samples_per_second": 41.374, |
| "eval_steps_per_second": 0.652, |
| "step": 60000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 61352, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 5000, |
| "total_flos": 3.672168663840768e+16, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|