|
{ |
|
"best_metric": 0.15070733428001404, |
|
"best_model_checkpoint": "checkpoints_orfeo/checkpoint-65079", |
|
"epoch": 9.0, |
|
"global_step": 65079, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.996542663532015e-05, |
|
"loss": 1.5229, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.99308532706403e-05, |
|
"loss": 0.7443, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.989627990596045e-05, |
|
"loss": 0.5936, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.986177568800996e-05, |
|
"loss": 0.508, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.982720232333011e-05, |
|
"loss": 0.4597, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.9792628958650257e-05, |
|
"loss": 0.4274, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.9758055593970406e-05, |
|
"loss": 0.3898, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 1.9723482229290554e-05, |
|
"loss": 0.3645, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9688978011340064e-05, |
|
"loss": 0.3486, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.9654404646660216e-05, |
|
"loss": 0.337, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 1.9619831281980364e-05, |
|
"loss": 0.3175, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.9585257917300513e-05, |
|
"loss": 0.3015, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.955068455262066e-05, |
|
"loss": 0.2914, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.9516111187940813e-05, |
|
"loss": 0.2843, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_bleu": 83.675, |
|
"eval_gen_len": 14.2653, |
|
"eval_loss": 0.24304039776325226, |
|
"eval_runtime": 556.3105, |
|
"eval_samples_per_second": 51.762, |
|
"eval_steps_per_second": 1.618, |
|
"step": 7231 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 1.948160696999032e-05, |
|
"loss": 0.2721, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 1.944703360531047e-05, |
|
"loss": 0.2533, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 1.941246024063062e-05, |
|
"loss": 0.2493, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 1.937788687595077e-05, |
|
"loss": 0.2444, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.9343313511270917e-05, |
|
"loss": 0.2432, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.930874014659107e-05, |
|
"loss": 0.2329, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 1.9274166781911217e-05, |
|
"loss": 0.2318, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 1.9239662563960727e-05, |
|
"loss": 0.2296, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.9205089199280876e-05, |
|
"loss": 0.2297, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 1.9170515834601024e-05, |
|
"loss": 0.2226, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 1.9135942469921173e-05, |
|
"loss": 0.2214, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.9101369105241325e-05, |
|
"loss": 0.2193, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 1.9066795740561473e-05, |
|
"loss": 0.2141, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 1.903222237588162e-05, |
|
"loss": 0.2121, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_bleu": 86.0981, |
|
"eval_gen_len": 14.1714, |
|
"eval_loss": 0.19072701036930084, |
|
"eval_runtime": 554.6671, |
|
"eval_samples_per_second": 51.916, |
|
"eval_steps_per_second": 1.623, |
|
"step": 14462 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 1.899764901120177e-05, |
|
"loss": 0.207, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 1.896314479325128e-05, |
|
"loss": 0.1895, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 1.892857142857143e-05, |
|
"loss": 0.1903, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 1.889399806389158e-05, |
|
"loss": 0.1883, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 1.885942469921173e-05, |
|
"loss": 0.1911, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 1.8824851334531877e-05, |
|
"loss": 0.1912, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 1.8790277969852026e-05, |
|
"loss": 0.1856, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.8755773751901536e-05, |
|
"loss": 0.1836, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 1.8721200387221684e-05, |
|
"loss": 0.1842, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 1.8686627022541836e-05, |
|
"loss": 0.1789, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.8652053657861984e-05, |
|
"loss": 0.1826, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.8617480293182133e-05, |
|
"loss": 0.1793, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 1.8582976075231643e-05, |
|
"loss": 0.1747, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 1.854840271055179e-05, |
|
"loss": 0.1725, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 1.851382934587194e-05, |
|
"loss": 0.1768, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_bleu": 87.1351, |
|
"eval_gen_len": 14.2257, |
|
"eval_loss": 0.17269779741764069, |
|
"eval_runtime": 554.1036, |
|
"eval_samples_per_second": 51.969, |
|
"eval_steps_per_second": 1.624, |
|
"step": 21693 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 1.847925598119209e-05, |
|
"loss": 0.1662, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 1.844468261651224e-05, |
|
"loss": 0.164, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.841010925183239e-05, |
|
"loss": 0.1614, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 1.837553588715254e-05, |
|
"loss": 0.1604, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 1.834096252247269e-05, |
|
"loss": 0.155, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.8306389157792837e-05, |
|
"loss": 0.1574, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"learning_rate": 1.8271884939842347e-05, |
|
"loss": 0.1593, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 1.82373115751625e-05, |
|
"loss": 0.1573, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 1.8202738210482648e-05, |
|
"loss": 0.1561, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.8168164845802796e-05, |
|
"loss": 0.1544, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.73, |
|
"learning_rate": 1.8133591481122945e-05, |
|
"loss": 0.1562, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 1.809908726317245e-05, |
|
"loss": 0.1596, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.806458304522196e-05, |
|
"loss": 0.1513, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 1.8030009680542113e-05, |
|
"loss": 0.1576, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_bleu": 87.7394, |
|
"eval_gen_len": 14.2568, |
|
"eval_loss": 0.1625259518623352, |
|
"eval_runtime": 551.0375, |
|
"eval_samples_per_second": 52.258, |
|
"eval_steps_per_second": 1.633, |
|
"step": 28924 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"learning_rate": 1.799543631586226e-05, |
|
"loss": 0.1499, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.796086295118241e-05, |
|
"loss": 0.1397, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 1.792635873323192e-05, |
|
"loss": 0.1391, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 1.7891785368552068e-05, |
|
"loss": 0.1415, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 1.7857212003872216e-05, |
|
"loss": 0.1384, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 1.7822638639192368e-05, |
|
"loss": 0.1397, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"learning_rate": 1.7788065274512517e-05, |
|
"loss": 0.1431, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 1.7753561056562027e-05, |
|
"loss": 0.1398, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 1.7718987691882175e-05, |
|
"loss": 0.1387, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 1.7684414327202324e-05, |
|
"loss": 0.142, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.7649840962522472e-05, |
|
"loss": 0.1393, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 1.7615267597842624e-05, |
|
"loss": 0.1381, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 1.7580694233162772e-05, |
|
"loss": 0.141, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 1.7546120868482924e-05, |
|
"loss": 0.1369, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 1.7511547503803073e-05, |
|
"loss": 0.1361, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_bleu": 88.1298, |
|
"eval_gen_len": 14.2962, |
|
"eval_loss": 0.1563238501548767, |
|
"eval_runtime": 552.3181, |
|
"eval_samples_per_second": 52.137, |
|
"eval_steps_per_second": 1.629, |
|
"step": 36155 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 1.747697413912322e-05, |
|
"loss": 0.1286, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 1.7442469921172728e-05, |
|
"loss": 0.1253, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 1.740789655649288e-05, |
|
"loss": 0.1278, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 1.7373323191813028e-05, |
|
"loss": 0.1258, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 1.733874982713318e-05, |
|
"loss": 0.127, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 1.7304245609182686e-05, |
|
"loss": 0.1242, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 1.7269672244502838e-05, |
|
"loss": 0.1224, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 1.7235098879822987e-05, |
|
"loss": 0.1267, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"learning_rate": 1.7200594661872493e-05, |
|
"loss": 0.1275, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 1.716602129719264e-05, |
|
"loss": 0.1277, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 1.7131447932512794e-05, |
|
"loss": 0.1285, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 1.7096874567832945e-05, |
|
"loss": 0.126, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 1.7062301203153094e-05, |
|
"loss": 0.1246, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 5.95, |
|
"learning_rate": 1.7027727838473242e-05, |
|
"loss": 0.1256, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_bleu": 88.3004, |
|
"eval_gen_len": 14.2442, |
|
"eval_loss": 0.15259724855422974, |
|
"eval_runtime": 550.8856, |
|
"eval_samples_per_second": 52.272, |
|
"eval_steps_per_second": 1.634, |
|
"step": 43386 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.699315447379339e-05, |
|
"loss": 0.1258, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.695858110911354e-05, |
|
"loss": 0.112, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 1.692407689116305e-05, |
|
"loss": 0.1127, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.22, |
|
"learning_rate": 1.68895035264832e-05, |
|
"loss": 0.1139, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 1.685493016180335e-05, |
|
"loss": 0.1163, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 1.6820356797123498e-05, |
|
"loss": 0.1157, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.6785783432443647e-05, |
|
"loss": 0.1142, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1.67512100677638e-05, |
|
"loss": 0.1123, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 1.6716636703083947e-05, |
|
"loss": 0.1135, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"learning_rate": 1.6682063338404095e-05, |
|
"loss": 0.1146, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 1.6647489973724244e-05, |
|
"loss": 0.1159, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.6612916609044392e-05, |
|
"loss": 0.1157, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1.6578343244364544e-05, |
|
"loss": 0.1148, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.6543769879684693e-05, |
|
"loss": 0.1139, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 1.6509265661734203e-05, |
|
"loss": 0.1164, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_bleu": 88.4796, |
|
"eval_gen_len": 14.2928, |
|
"eval_loss": 0.15147347748279572, |
|
"eval_runtime": 552.236, |
|
"eval_samples_per_second": 52.144, |
|
"eval_steps_per_second": 1.63, |
|
"step": 50617 |
|
}, |
|
{ |
|
"epoch": 7.05, |
|
"learning_rate": 1.647469229705435e-05, |
|
"loss": 0.1055, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1.64401189323745e-05, |
|
"loss": 0.1028, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 1.640561471442401e-05, |
|
"loss": 0.1055, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 1.6371041349744158e-05, |
|
"loss": 0.105, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.33, |
|
"learning_rate": 1.6336537131793668e-05, |
|
"loss": 0.1052, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.4, |
|
"learning_rate": 1.6301963767113816e-05, |
|
"loss": 0.1063, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.6267390402433968e-05, |
|
"loss": 0.1048, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.54, |
|
"learning_rate": 1.6232817037754117e-05, |
|
"loss": 0.1073, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 1.6198243673074265e-05, |
|
"loss": 0.106, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.6163670308394413e-05, |
|
"loss": 0.1036, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 1.6129096943714565e-05, |
|
"loss": 0.1057, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 1.6094592725764072e-05, |
|
"loss": 0.1051, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 1.6060019361084224e-05, |
|
"loss": 0.1057, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.6025445996404372e-05, |
|
"loss": 0.1051, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_bleu": 88.7364, |
|
"eval_gen_len": 14.3991, |
|
"eval_loss": 0.1514560878276825, |
|
"eval_runtime": 559.4528, |
|
"eval_samples_per_second": 51.472, |
|
"eval_steps_per_second": 1.609, |
|
"step": 57848 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 1.599087263172452e-05, |
|
"loss": 0.103, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 1.595629926704467e-05, |
|
"loss": 0.0925, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 1.592172590236482e-05, |
|
"loss": 0.0942, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 1.588715253768497e-05, |
|
"loss": 0.0965, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 8.3, |
|
"learning_rate": 1.585264831973448e-05, |
|
"loss": 0.0952, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.37, |
|
"learning_rate": 1.5818074955054628e-05, |
|
"loss": 0.0967, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 8.44, |
|
"learning_rate": 1.5783501590374776e-05, |
|
"loss": 0.0957, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 1.5748928225694925e-05, |
|
"loss": 0.0938, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 8.57, |
|
"learning_rate": 1.5714354861015077e-05, |
|
"loss": 0.0981, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"learning_rate": 1.5679781496335225e-05, |
|
"loss": 0.1008, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 1.5645208131655374e-05, |
|
"loss": 0.0987, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 1.5610703913704883e-05, |
|
"loss": 0.0985, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 1.5576130549025032e-05, |
|
"loss": 0.097, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.92, |
|
"learning_rate": 1.554155718434518e-05, |
|
"loss": 0.0985, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 1.5506983819665332e-05, |
|
"loss": 0.0966, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_bleu": 88.826, |
|
"eval_gen_len": 14.3624, |
|
"eval_loss": 0.15070733428001404, |
|
"eval_runtime": 553.1909, |
|
"eval_samples_per_second": 52.054, |
|
"eval_steps_per_second": 1.627, |
|
"step": 65079 |
|
} |
|
], |
|
"max_steps": 289240, |
|
"num_train_epochs": 40, |
|
"total_flos": 2.004229422148485e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|