{ "best_metric": 0.15070733428001404, "best_model_checkpoint": "checkpoints_orfeo/checkpoint-65079", "epoch": 9.0, "global_step": 65079, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 1.996542663532015e-05, "loss": 1.5229, "step": 500 }, { "epoch": 0.14, "learning_rate": 1.99308532706403e-05, "loss": 0.7443, "step": 1000 }, { "epoch": 0.21, "learning_rate": 1.989627990596045e-05, "loss": 0.5936, "step": 1500 }, { "epoch": 0.28, "learning_rate": 1.986177568800996e-05, "loss": 0.508, "step": 2000 }, { "epoch": 0.35, "learning_rate": 1.982720232333011e-05, "loss": 0.4597, "step": 2500 }, { "epoch": 0.41, "learning_rate": 1.9792628958650257e-05, "loss": 0.4274, "step": 3000 }, { "epoch": 0.48, "learning_rate": 1.9758055593970406e-05, "loss": 0.3898, "step": 3500 }, { "epoch": 0.55, "learning_rate": 1.9723482229290554e-05, "loss": 0.3645, "step": 4000 }, { "epoch": 0.62, "learning_rate": 1.9688978011340064e-05, "loss": 0.3486, "step": 4500 }, { "epoch": 0.69, "learning_rate": 1.9654404646660216e-05, "loss": 0.337, "step": 5000 }, { "epoch": 0.76, "learning_rate": 1.9619831281980364e-05, "loss": 0.3175, "step": 5500 }, { "epoch": 0.83, "learning_rate": 1.9585257917300513e-05, "loss": 0.3015, "step": 6000 }, { "epoch": 0.9, "learning_rate": 1.955068455262066e-05, "loss": 0.2914, "step": 6500 }, { "epoch": 0.97, "learning_rate": 1.9516111187940813e-05, "loss": 0.2843, "step": 7000 }, { "epoch": 1.0, "eval_bleu": 83.675, "eval_gen_len": 14.2653, "eval_loss": 0.24304039776325226, "eval_runtime": 556.3105, "eval_samples_per_second": 51.762, "eval_steps_per_second": 1.618, "step": 7231 }, { "epoch": 1.04, "learning_rate": 1.948160696999032e-05, "loss": 0.2721, "step": 7500 }, { "epoch": 1.11, "learning_rate": 1.944703360531047e-05, "loss": 0.2533, "step": 8000 }, { "epoch": 1.18, "learning_rate": 1.941246024063062e-05, "loss": 0.2493, "step": 8500 }, { "epoch": 1.24, "learning_rate": 1.937788687595077e-05, "loss": 0.2444, "step": 9000 }, { "epoch": 1.31, "learning_rate": 1.9343313511270917e-05, "loss": 0.2432, "step": 9500 }, { "epoch": 1.38, "learning_rate": 1.930874014659107e-05, "loss": 0.2329, "step": 10000 }, { "epoch": 1.45, "learning_rate": 1.9274166781911217e-05, "loss": 0.2318, "step": 10500 }, { "epoch": 1.52, "learning_rate": 1.9239662563960727e-05, "loss": 0.2296, "step": 11000 }, { "epoch": 1.59, "learning_rate": 1.9205089199280876e-05, "loss": 0.2297, "step": 11500 }, { "epoch": 1.66, "learning_rate": 1.9170515834601024e-05, "loss": 0.2226, "step": 12000 }, { "epoch": 1.73, "learning_rate": 1.9135942469921173e-05, "loss": 0.2214, "step": 12500 }, { "epoch": 1.8, "learning_rate": 1.9101369105241325e-05, "loss": 0.2193, "step": 13000 }, { "epoch": 1.87, "learning_rate": 1.9066795740561473e-05, "loss": 0.2141, "step": 13500 }, { "epoch": 1.94, "learning_rate": 1.903222237588162e-05, "loss": 0.2121, "step": 14000 }, { "epoch": 2.0, "eval_bleu": 86.0981, "eval_gen_len": 14.1714, "eval_loss": 0.19072701036930084, "eval_runtime": 554.6671, "eval_samples_per_second": 51.916, "eval_steps_per_second": 1.623, "step": 14462 }, { "epoch": 2.01, "learning_rate": 1.899764901120177e-05, "loss": 0.207, "step": 14500 }, { "epoch": 2.07, "learning_rate": 1.896314479325128e-05, "loss": 0.1895, "step": 15000 }, { "epoch": 2.14, "learning_rate": 1.892857142857143e-05, "loss": 0.1903, "step": 15500 }, { "epoch": 2.21, "learning_rate": 1.889399806389158e-05, "loss": 0.1883, "step": 16000 }, { "epoch": 2.28, "learning_rate": 1.885942469921173e-05, "loss": 0.1911, "step": 16500 }, { "epoch": 2.35, "learning_rate": 1.8824851334531877e-05, "loss": 0.1912, "step": 17000 }, { "epoch": 2.42, "learning_rate": 1.8790277969852026e-05, "loss": 0.1856, "step": 17500 }, { "epoch": 2.49, "learning_rate": 1.8755773751901536e-05, "loss": 0.1836, "step": 18000 }, { "epoch": 2.56, "learning_rate": 1.8721200387221684e-05, "loss": 0.1842, "step": 18500 }, { "epoch": 2.63, "learning_rate": 1.8686627022541836e-05, "loss": 0.1789, "step": 19000 }, { "epoch": 2.7, "learning_rate": 1.8652053657861984e-05, "loss": 0.1826, "step": 19500 }, { "epoch": 2.77, "learning_rate": 1.8617480293182133e-05, "loss": 0.1793, "step": 20000 }, { "epoch": 2.84, "learning_rate": 1.8582976075231643e-05, "loss": 0.1747, "step": 20500 }, { "epoch": 2.9, "learning_rate": 1.854840271055179e-05, "loss": 0.1725, "step": 21000 }, { "epoch": 2.97, "learning_rate": 1.851382934587194e-05, "loss": 0.1768, "step": 21500 }, { "epoch": 3.0, "eval_bleu": 87.1351, "eval_gen_len": 14.2257, "eval_loss": 0.17269779741764069, "eval_runtime": 554.1036, "eval_samples_per_second": 51.969, "eval_steps_per_second": 1.624, "step": 21693 }, { "epoch": 3.04, "learning_rate": 1.847925598119209e-05, "loss": 0.1662, "step": 22000 }, { "epoch": 3.11, "learning_rate": 1.844468261651224e-05, "loss": 0.164, "step": 22500 }, { "epoch": 3.18, "learning_rate": 1.841010925183239e-05, "loss": 0.1614, "step": 23000 }, { "epoch": 3.25, "learning_rate": 1.837553588715254e-05, "loss": 0.1604, "step": 23500 }, { "epoch": 3.32, "learning_rate": 1.834096252247269e-05, "loss": 0.155, "step": 24000 }, { "epoch": 3.39, "learning_rate": 1.8306389157792837e-05, "loss": 0.1574, "step": 24500 }, { "epoch": 3.46, "learning_rate": 1.8271884939842347e-05, "loss": 0.1593, "step": 25000 }, { "epoch": 3.53, "learning_rate": 1.82373115751625e-05, "loss": 0.1573, "step": 25500 }, { "epoch": 3.6, "learning_rate": 1.8202738210482648e-05, "loss": 0.1561, "step": 26000 }, { "epoch": 3.66, "learning_rate": 1.8168164845802796e-05, "loss": 0.1544, "step": 26500 }, { "epoch": 3.73, "learning_rate": 1.8133591481122945e-05, "loss": 0.1562, "step": 27000 }, { "epoch": 3.8, "learning_rate": 1.809908726317245e-05, "loss": 0.1596, "step": 27500 }, { "epoch": 3.87, "learning_rate": 1.806458304522196e-05, "loss": 0.1513, "step": 28000 }, { "epoch": 3.94, "learning_rate": 1.8030009680542113e-05, "loss": 0.1576, "step": 28500 }, { "epoch": 4.0, "eval_bleu": 87.7394, "eval_gen_len": 14.2568, "eval_loss": 0.1625259518623352, "eval_runtime": 551.0375, "eval_samples_per_second": 52.258, "eval_steps_per_second": 1.633, "step": 28924 }, { "epoch": 4.01, "learning_rate": 1.799543631586226e-05, "loss": 0.1499, "step": 29000 }, { "epoch": 4.08, "learning_rate": 1.796086295118241e-05, "loss": 0.1397, "step": 29500 }, { "epoch": 4.15, "learning_rate": 1.792635873323192e-05, "loss": 0.1391, "step": 30000 }, { "epoch": 4.22, "learning_rate": 1.7891785368552068e-05, "loss": 0.1415, "step": 30500 }, { "epoch": 4.29, "learning_rate": 1.7857212003872216e-05, "loss": 0.1384, "step": 31000 }, { "epoch": 4.36, "learning_rate": 1.7822638639192368e-05, "loss": 0.1397, "step": 31500 }, { "epoch": 4.43, "learning_rate": 1.7788065274512517e-05, "loss": 0.1431, "step": 32000 }, { "epoch": 4.49, "learning_rate": 1.7753561056562027e-05, "loss": 0.1398, "step": 32500 }, { "epoch": 4.56, "learning_rate": 1.7718987691882175e-05, "loss": 0.1387, "step": 33000 }, { "epoch": 4.63, "learning_rate": 1.7684414327202324e-05, "loss": 0.142, "step": 33500 }, { "epoch": 4.7, "learning_rate": 1.7649840962522472e-05, "loss": 0.1393, "step": 34000 }, { "epoch": 4.77, "learning_rate": 1.7615267597842624e-05, "loss": 0.1381, "step": 34500 }, { "epoch": 4.84, "learning_rate": 1.7580694233162772e-05, "loss": 0.141, "step": 35000 }, { "epoch": 4.91, "learning_rate": 1.7546120868482924e-05, "loss": 0.1369, "step": 35500 }, { "epoch": 4.98, "learning_rate": 1.7511547503803073e-05, "loss": 0.1361, "step": 36000 }, { "epoch": 5.0, "eval_bleu": 88.1298, "eval_gen_len": 14.2962, "eval_loss": 0.1563238501548767, "eval_runtime": 552.3181, "eval_samples_per_second": 52.137, "eval_steps_per_second": 1.629, "step": 36155 }, { "epoch": 5.05, "learning_rate": 1.747697413912322e-05, "loss": 0.1286, "step": 36500 }, { "epoch": 5.12, "learning_rate": 1.7442469921172728e-05, "loss": 0.1253, "step": 37000 }, { "epoch": 5.19, "learning_rate": 1.740789655649288e-05, "loss": 0.1278, "step": 37500 }, { "epoch": 5.26, "learning_rate": 1.7373323191813028e-05, "loss": 0.1258, "step": 38000 }, { "epoch": 5.32, "learning_rate": 1.733874982713318e-05, "loss": 0.127, "step": 38500 }, { "epoch": 5.39, "learning_rate": 1.7304245609182686e-05, "loss": 0.1242, "step": 39000 }, { "epoch": 5.46, "learning_rate": 1.7269672244502838e-05, "loss": 0.1224, "step": 39500 }, { "epoch": 5.53, "learning_rate": 1.7235098879822987e-05, "loss": 0.1267, "step": 40000 }, { "epoch": 5.6, "learning_rate": 1.7200594661872493e-05, "loss": 0.1275, "step": 40500 }, { "epoch": 5.67, "learning_rate": 1.716602129719264e-05, "loss": 0.1277, "step": 41000 }, { "epoch": 5.74, "learning_rate": 1.7131447932512794e-05, "loss": 0.1285, "step": 41500 }, { "epoch": 5.81, "learning_rate": 1.7096874567832945e-05, "loss": 0.126, "step": 42000 }, { "epoch": 5.88, "learning_rate": 1.7062301203153094e-05, "loss": 0.1246, "step": 42500 }, { "epoch": 5.95, "learning_rate": 1.7027727838473242e-05, "loss": 0.1256, "step": 43000 }, { "epoch": 6.0, "eval_bleu": 88.3004, "eval_gen_len": 14.2442, "eval_loss": 0.15259724855422974, "eval_runtime": 550.8856, "eval_samples_per_second": 52.272, "eval_steps_per_second": 1.634, "step": 43386 }, { "epoch": 6.02, "learning_rate": 1.699315447379339e-05, "loss": 0.1258, "step": 43500 }, { "epoch": 6.08, "learning_rate": 1.695858110911354e-05, "loss": 0.112, "step": 44000 }, { "epoch": 6.15, "learning_rate": 1.692407689116305e-05, "loss": 0.1127, "step": 44500 }, { "epoch": 6.22, "learning_rate": 1.68895035264832e-05, "loss": 0.1139, "step": 45000 }, { "epoch": 6.29, "learning_rate": 1.685493016180335e-05, "loss": 0.1163, "step": 45500 }, { "epoch": 6.36, "learning_rate": 1.6820356797123498e-05, "loss": 0.1157, "step": 46000 }, { "epoch": 6.43, "learning_rate": 1.6785783432443647e-05, "loss": 0.1142, "step": 46500 }, { "epoch": 6.5, "learning_rate": 1.67512100677638e-05, "loss": 0.1123, "step": 47000 }, { "epoch": 6.57, "learning_rate": 1.6716636703083947e-05, "loss": 0.1135, "step": 47500 }, { "epoch": 6.64, "learning_rate": 1.6682063338404095e-05, "loss": 0.1146, "step": 48000 }, { "epoch": 6.71, "learning_rate": 1.6647489973724244e-05, "loss": 0.1159, "step": 48500 }, { "epoch": 6.78, "learning_rate": 1.6612916609044392e-05, "loss": 0.1157, "step": 49000 }, { "epoch": 6.85, "learning_rate": 1.6578343244364544e-05, "loss": 0.1148, "step": 49500 }, { "epoch": 6.91, "learning_rate": 1.6543769879684693e-05, "loss": 0.1139, "step": 50000 }, { "epoch": 6.98, "learning_rate": 1.6509265661734203e-05, "loss": 0.1164, "step": 50500 }, { "epoch": 7.0, "eval_bleu": 88.4796, "eval_gen_len": 14.2928, "eval_loss": 0.15147347748279572, "eval_runtime": 552.236, "eval_samples_per_second": 52.144, "eval_steps_per_second": 1.63, "step": 50617 }, { "epoch": 7.05, "learning_rate": 1.647469229705435e-05, "loss": 0.1055, "step": 51000 }, { "epoch": 7.12, "learning_rate": 1.64401189323745e-05, "loss": 0.1028, "step": 51500 }, { "epoch": 7.19, "learning_rate": 1.640561471442401e-05, "loss": 0.1055, "step": 52000 }, { "epoch": 7.26, "learning_rate": 1.6371041349744158e-05, "loss": 0.105, "step": 52500 }, { "epoch": 7.33, "learning_rate": 1.6336537131793668e-05, "loss": 0.1052, "step": 53000 }, { "epoch": 7.4, "learning_rate": 1.6301963767113816e-05, "loss": 0.1063, "step": 53500 }, { "epoch": 7.47, "learning_rate": 1.6267390402433968e-05, "loss": 0.1048, "step": 54000 }, { "epoch": 7.54, "learning_rate": 1.6232817037754117e-05, "loss": 0.1073, "step": 54500 }, { "epoch": 7.61, "learning_rate": 1.6198243673074265e-05, "loss": 0.106, "step": 55000 }, { "epoch": 7.68, "learning_rate": 1.6163670308394413e-05, "loss": 0.1036, "step": 55500 }, { "epoch": 7.74, "learning_rate": 1.6129096943714565e-05, "loss": 0.1057, "step": 56000 }, { "epoch": 7.81, "learning_rate": 1.6094592725764072e-05, "loss": 0.1051, "step": 56500 }, { "epoch": 7.88, "learning_rate": 1.6060019361084224e-05, "loss": 0.1057, "step": 57000 }, { "epoch": 7.95, "learning_rate": 1.6025445996404372e-05, "loss": 0.1051, "step": 57500 }, { "epoch": 8.0, "eval_bleu": 88.7364, "eval_gen_len": 14.3991, "eval_loss": 0.1514560878276825, "eval_runtime": 559.4528, "eval_samples_per_second": 51.472, "eval_steps_per_second": 1.609, "step": 57848 }, { "epoch": 8.02, "learning_rate": 1.599087263172452e-05, "loss": 0.103, "step": 58000 }, { "epoch": 8.09, "learning_rate": 1.595629926704467e-05, "loss": 0.0925, "step": 58500 }, { "epoch": 8.16, "learning_rate": 1.592172590236482e-05, "loss": 0.0942, "step": 59000 }, { "epoch": 8.23, "learning_rate": 1.588715253768497e-05, "loss": 0.0965, "step": 59500 }, { "epoch": 8.3, "learning_rate": 1.585264831973448e-05, "loss": 0.0952, "step": 60000 }, { "epoch": 8.37, "learning_rate": 1.5818074955054628e-05, "loss": 0.0967, "step": 60500 }, { "epoch": 8.44, "learning_rate": 1.5783501590374776e-05, "loss": 0.0957, "step": 61000 }, { "epoch": 8.51, "learning_rate": 1.5748928225694925e-05, "loss": 0.0938, "step": 61500 }, { "epoch": 8.57, "learning_rate": 1.5714354861015077e-05, "loss": 0.0981, "step": 62000 }, { "epoch": 8.64, "learning_rate": 1.5679781496335225e-05, "loss": 0.1008, "step": 62500 }, { "epoch": 8.71, "learning_rate": 1.5645208131655374e-05, "loss": 0.0987, "step": 63000 }, { "epoch": 8.78, "learning_rate": 1.5610703913704883e-05, "loss": 0.0985, "step": 63500 }, { "epoch": 8.85, "learning_rate": 1.5576130549025032e-05, "loss": 0.097, "step": 64000 }, { "epoch": 8.92, "learning_rate": 1.554155718434518e-05, "loss": 0.0985, "step": 64500 }, { "epoch": 8.99, "learning_rate": 1.5506983819665332e-05, "loss": 0.0966, "step": 65000 }, { "epoch": 9.0, "eval_bleu": 88.826, "eval_gen_len": 14.3624, "eval_loss": 0.15070733428001404, "eval_runtime": 553.1909, "eval_samples_per_second": 52.054, "eval_steps_per_second": 1.627, "step": 65079 } ], "max_steps": 289240, "num_train_epochs": 40, "total_flos": 2.004229422148485e+17, "trial_name": null, "trial_params": null }