{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.096121848201715, "eval_steps": 500, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.0004957330602491893, "loss": 2.0441, "step": 100 }, { "epoch": 0.05, "learning_rate": 0.0004914661204983786, "loss": 1.713, "step": 200 }, { "epoch": 0.08, "learning_rate": 0.00048719918074756785, "loss": 1.6007, "step": 300 }, { "epoch": 0.1, "learning_rate": 0.00048293224099675715, "loss": 1.5181, "step": 400 }, { "epoch": 0.13, "learning_rate": 0.00047866530124594645, "loss": 1.4493, "step": 500 }, { "epoch": 0.13, "eval_bleu": 3.9997520454971136, "eval_loss": 1.3540712594985962, "eval_runtime": 10.3322, "eval_samples_per_second": 193.569, "eval_steps_per_second": 1.549, "step": 500 }, { "epoch": 0.15, "learning_rate": 0.0004743983614951357, "loss": 1.4096, "step": 600 }, { "epoch": 0.18, "learning_rate": 0.000470131421744325, "loss": 1.3729, "step": 700 }, { "epoch": 0.2, "learning_rate": 0.0004658644819935143, "loss": 1.3389, "step": 800 }, { "epoch": 0.23, "learning_rate": 0.00046159754224270354, "loss": 1.3166, "step": 900 }, { "epoch": 0.26, "learning_rate": 0.00045733060249189284, "loss": 1.2915, "step": 1000 }, { "epoch": 0.26, "eval_bleu": 5.393565315342523, "eval_loss": 1.2113043069839478, "eval_runtime": 8.5631, "eval_samples_per_second": 233.562, "eval_steps_per_second": 1.868, "step": 1000 }, { "epoch": 0.28, "learning_rate": 0.00045306366274108214, "loss": 1.2669, "step": 1100 }, { "epoch": 0.31, "learning_rate": 0.0004487967229902714, "loss": 1.2492, "step": 1200 }, { "epoch": 0.33, "learning_rate": 0.0004445297832394607, "loss": 1.2371, "step": 1300 }, { "epoch": 0.36, "learning_rate": 0.00044026284348864993, "loss": 1.2231, "step": 1400 }, { "epoch": 0.38, "learning_rate": 0.00043599590373783923, "loss": 1.2059, "step": 1500 }, { "epoch": 0.38, "eval_bleu": 5.838067918496751, "eval_loss": 1.1367273330688477, "eval_runtime": 9.2404, "eval_samples_per_second": 216.442, "eval_steps_per_second": 1.732, "step": 1500 }, { "epoch": 0.41, "learning_rate": 0.0004317289639870285, "loss": 1.1931, "step": 1600 }, { "epoch": 0.44, "learning_rate": 0.0004274620242362178, "loss": 1.1761, "step": 1700 }, { "epoch": 0.46, "learning_rate": 0.0004231950844854071, "loss": 1.1792, "step": 1800 }, { "epoch": 0.49, "learning_rate": 0.0004189281447345963, "loss": 1.1614, "step": 1900 }, { "epoch": 0.51, "learning_rate": 0.0004146612049837856, "loss": 1.1573, "step": 2000 }, { "epoch": 0.51, "eval_bleu": 6.242227124133382, "eval_loss": 1.0900604724884033, "eval_runtime": 8.3953, "eval_samples_per_second": 238.228, "eval_steps_per_second": 1.906, "step": 2000 }, { "epoch": 0.54, "learning_rate": 0.0004103942652329749, "loss": 1.1449, "step": 2100 }, { "epoch": 0.56, "learning_rate": 0.00040612732548216417, "loss": 1.1379, "step": 2200 }, { "epoch": 0.59, "learning_rate": 0.00040186038573135347, "loss": 1.1281, "step": 2300 }, { "epoch": 0.61, "learning_rate": 0.00039759344598054277, "loss": 1.1226, "step": 2400 }, { "epoch": 0.64, "learning_rate": 0.00039332650622973206, "loss": 1.1121, "step": 2500 }, { "epoch": 0.64, "eval_bleu": 6.627148578471305, "eval_loss": 1.0542418956756592, "eval_runtime": 9.2835, "eval_samples_per_second": 215.436, "eval_steps_per_second": 1.723, "step": 2500 }, { "epoch": 0.67, "learning_rate": 0.0003890595664789213, "loss": 1.1076, "step": 2600 }, { "epoch": 0.69, "learning_rate": 0.0003847926267281106, "loss": 1.1001, "step": 2700 }, { "epoch": 0.72, "learning_rate": 0.0003805256869772999, "loss": 1.0996, "step": 2800 }, { "epoch": 0.74, "learning_rate": 0.00037625874722648915, "loss": 1.088, "step": 2900 }, { "epoch": 0.77, "learning_rate": 0.00037199180747567845, "loss": 1.0867, "step": 3000 }, { "epoch": 0.77, "eval_bleu": 6.879580310573366, "eval_loss": 1.025155782699585, "eval_runtime": 8.5706, "eval_samples_per_second": 233.355, "eval_steps_per_second": 1.867, "step": 3000 }, { "epoch": 0.79, "learning_rate": 0.00036772486772486775, "loss": 1.0784, "step": 3100 }, { "epoch": 0.82, "learning_rate": 0.000363457927974057, "loss": 1.0794, "step": 3200 }, { "epoch": 0.84, "learning_rate": 0.0003591909882232463, "loss": 1.0736, "step": 3300 }, { "epoch": 0.87, "learning_rate": 0.0003549240484724356, "loss": 1.0684, "step": 3400 }, { "epoch": 0.9, "learning_rate": 0.0003506571087216249, "loss": 1.0623, "step": 3500 }, { "epoch": 0.9, "eval_bleu": 7.039305128716119, "eval_loss": 1.0067821741104126, "eval_runtime": 9.3223, "eval_samples_per_second": 214.539, "eval_steps_per_second": 1.716, "step": 3500 }, { "epoch": 0.92, "learning_rate": 0.00034639016897081414, "loss": 1.0609, "step": 3600 }, { "epoch": 0.95, "learning_rate": 0.00034212322922000344, "loss": 1.0579, "step": 3700 }, { "epoch": 0.97, "learning_rate": 0.00033785628946919274, "loss": 1.0517, "step": 3800 }, { "epoch": 1.0, "learning_rate": 0.000333589349718382, "loss": 1.054, "step": 3900 }, { "epoch": 1.02, "learning_rate": 0.0003293224099675713, "loss": 1.0408, "step": 4000 }, { "epoch": 1.02, "eval_bleu": 7.2660174031875915, "eval_loss": 0.9882246255874634, "eval_runtime": 8.4135, "eval_samples_per_second": 237.713, "eval_steps_per_second": 1.902, "step": 4000 }, { "epoch": 1.05, "learning_rate": 0.0003250554702167606, "loss": 1.0317, "step": 4100 }, { "epoch": 1.08, "learning_rate": 0.0003207885304659498, "loss": 1.0344, "step": 4200 }, { "epoch": 1.1, "learning_rate": 0.0003165215907151391, "loss": 1.0274, "step": 4300 }, { "epoch": 1.13, "learning_rate": 0.0003122546509643284, "loss": 1.0216, "step": 4400 }, { "epoch": 1.15, "learning_rate": 0.0003079877112135177, "loss": 1.0203, "step": 4500 }, { "epoch": 1.15, "eval_bleu": 7.055297387993883, "eval_loss": 0.9723305702209473, "eval_runtime": 9.3412, "eval_samples_per_second": 214.105, "eval_steps_per_second": 1.713, "step": 4500 }, { "epoch": 1.18, "learning_rate": 0.0003037207714627069, "loss": 1.0217, "step": 4600 }, { "epoch": 1.2, "learning_rate": 0.0002994538317118962, "loss": 1.0184, "step": 4700 }, { "epoch": 1.23, "learning_rate": 0.0002951868919610855, "loss": 1.015, "step": 4800 }, { "epoch": 1.25, "learning_rate": 0.00029091995221027477, "loss": 1.011, "step": 4900 }, { "epoch": 1.28, "learning_rate": 0.00028665301245946406, "loss": 1.0054, "step": 5000 }, { "epoch": 1.28, "eval_bleu": 7.455525040183996, "eval_loss": 0.9624391198158264, "eval_runtime": 8.4826, "eval_samples_per_second": 235.778, "eval_steps_per_second": 1.886, "step": 5000 }, { "epoch": 1.31, "learning_rate": 0.00028238607270865336, "loss": 1.0062, "step": 5100 }, { "epoch": 1.33, "learning_rate": 0.0002781191329578426, "loss": 1.0081, "step": 5200 }, { "epoch": 1.36, "learning_rate": 0.0002738521932070319, "loss": 1.0007, "step": 5300 }, { "epoch": 1.38, "learning_rate": 0.0002695852534562212, "loss": 0.9973, "step": 5400 }, { "epoch": 1.41, "learning_rate": 0.0002653183137054105, "loss": 0.9977, "step": 5500 }, { "epoch": 1.41, "eval_bleu": 7.426000467701662, "eval_loss": 0.9526358842849731, "eval_runtime": 9.3659, "eval_samples_per_second": 213.542, "eval_steps_per_second": 1.708, "step": 5500 }, { "epoch": 1.43, "learning_rate": 0.00026105137395459975, "loss": 0.9982, "step": 5600 }, { "epoch": 1.46, "learning_rate": 0.00025678443420378905, "loss": 0.9956, "step": 5700 }, { "epoch": 1.48, "learning_rate": 0.00025251749445297835, "loss": 0.9941, "step": 5800 }, { "epoch": 1.51, "learning_rate": 0.0002482505547021676, "loss": 0.9881, "step": 5900 }, { "epoch": 1.54, "learning_rate": 0.0002439836149513569, "loss": 0.9931, "step": 6000 }, { "epoch": 1.54, "eval_bleu": 7.523072732517067, "eval_loss": 0.9395684003829956, "eval_runtime": 9.0823, "eval_samples_per_second": 220.209, "eval_steps_per_second": 1.762, "step": 6000 }, { "epoch": 1.56, "learning_rate": 0.0002397166752005462, "loss": 0.9834, "step": 6100 }, { "epoch": 1.59, "learning_rate": 0.00023544973544973544, "loss": 0.9821, "step": 6200 }, { "epoch": 1.61, "learning_rate": 0.00023118279569892471, "loss": 0.9859, "step": 6300 }, { "epoch": 1.64, "learning_rate": 0.00022691585594811401, "loss": 0.9762, "step": 6400 }, { "epoch": 1.66, "learning_rate": 0.0002226489161973033, "loss": 0.9804, "step": 6500 }, { "epoch": 1.66, "eval_bleu": 7.43758431482021, "eval_loss": 0.9323887825012207, "eval_runtime": 9.0344, "eval_samples_per_second": 221.376, "eval_steps_per_second": 1.771, "step": 6500 }, { "epoch": 1.69, "learning_rate": 0.00021838197644649259, "loss": 0.9794, "step": 6600 }, { "epoch": 1.72, "learning_rate": 0.00021411503669568186, "loss": 0.9737, "step": 6700 }, { "epoch": 1.74, "learning_rate": 0.00020984809694487113, "loss": 0.9696, "step": 6800 }, { "epoch": 1.77, "learning_rate": 0.00020558115719406043, "loss": 0.9735, "step": 6900 }, { "epoch": 1.79, "learning_rate": 0.0002013142174432497, "loss": 0.9691, "step": 7000 }, { "epoch": 1.79, "eval_bleu": 7.52270860836675, "eval_loss": 0.9264442324638367, "eval_runtime": 9.3377, "eval_samples_per_second": 214.186, "eval_steps_per_second": 1.713, "step": 7000 }, { "epoch": 1.82, "learning_rate": 0.000197047277692439, "loss": 0.9754, "step": 7100 }, { "epoch": 1.84, "learning_rate": 0.00019278033794162827, "loss": 0.9688, "step": 7200 }, { "epoch": 1.87, "learning_rate": 0.00018851339819081755, "loss": 0.968, "step": 7300 }, { "epoch": 1.89, "learning_rate": 0.00018424645844000685, "loss": 0.9628, "step": 7400 }, { "epoch": 1.92, "learning_rate": 0.00017997951868919612, "loss": 0.9645, "step": 7500 }, { "epoch": 1.92, "eval_bleu": 7.685904345266889, "eval_loss": 0.9192501306533813, "eval_runtime": 8.7414, "eval_samples_per_second": 228.796, "eval_steps_per_second": 1.83, "step": 7500 }, { "epoch": 1.95, "learning_rate": 0.0001757125789383854, "loss": 0.9674, "step": 7600 }, { "epoch": 1.97, "learning_rate": 0.00017144563918757466, "loss": 0.9659, "step": 7700 }, { "epoch": 2.0, "learning_rate": 0.00016717869943676394, "loss": 0.9646, "step": 7800 }, { "epoch": 2.02, "learning_rate": 0.00016291175968595324, "loss": 0.9477, "step": 7900 }, { "epoch": 2.05, "learning_rate": 0.0001586448199351425, "loss": 0.9509, "step": 8000 }, { "epoch": 2.05, "eval_bleu": 7.647325378973634, "eval_loss": 0.9144095778465271, "eval_runtime": 9.1538, "eval_samples_per_second": 218.489, "eval_steps_per_second": 1.748, "step": 8000 }, { "epoch": 2.07, "learning_rate": 0.00024078341013824886, "loss": 0.9532, "step": 8100 }, { "epoch": 2.1, "learning_rate": 0.00023758320532514082, "loss": 0.949, "step": 8200 }, { "epoch": 2.12, "learning_rate": 0.00023438300051203275, "loss": 0.9506, "step": 8300 }, { "epoch": 2.15, "learning_rate": 0.00023118279569892471, "loss": 0.9539, "step": 8400 }, { "epoch": 2.18, "learning_rate": 0.00022798259088581668, "loss": 0.9485, "step": 8500 }, { "epoch": 2.18, "eval_bleu": 7.654834398763605, "eval_loss": 0.9117684364318848, "eval_runtime": 11.8365, "eval_samples_per_second": 168.969, "eval_steps_per_second": 1.352, "step": 8500 }, { "epoch": 2.2, "learning_rate": 0.00022478238607270864, "loss": 0.9495, "step": 8600 }, { "epoch": 2.23, "learning_rate": 0.00022158218125960063, "loss": 0.9484, "step": 8700 }, { "epoch": 2.25, "learning_rate": 0.00021838197644649259, "loss": 0.9486, "step": 8800 }, { "epoch": 2.28, "learning_rate": 0.00021518177163338455, "loss": 0.9445, "step": 8900 }, { "epoch": 2.3, "learning_rate": 0.0002119815668202765, "loss": 0.9437, "step": 9000 }, { "epoch": 2.3, "eval_bleu": 7.606584348844775, "eval_loss": 0.9072746634483337, "eval_runtime": 9.3821, "eval_samples_per_second": 213.171, "eval_steps_per_second": 1.705, "step": 9000 }, { "epoch": 2.33, "learning_rate": 0.00020878136200716847, "loss": 0.9406, "step": 9100 }, { "epoch": 2.36, "learning_rate": 0.00020558115719406043, "loss": 0.9418, "step": 9200 }, { "epoch": 2.38, "learning_rate": 0.0002023809523809524, "loss": 0.9402, "step": 9300 }, { "epoch": 2.41, "learning_rate": 0.00019918074756784435, "loss": 0.94, "step": 9400 }, { "epoch": 2.43, "learning_rate": 0.00019598054275473631, "loss": 0.9393, "step": 9500 }, { "epoch": 2.43, "eval_bleu": 7.714033503089272, "eval_loss": 0.9019351601600647, "eval_runtime": 9.2531, "eval_samples_per_second": 216.144, "eval_steps_per_second": 1.729, "step": 9500 }, { "epoch": 2.46, "learning_rate": 0.00019278033794162827, "loss": 0.9378, "step": 9600 }, { "epoch": 2.48, "learning_rate": 0.00018958013312852024, "loss": 0.9345, "step": 9700 }, { "epoch": 2.51, "learning_rate": 0.0001863799283154122, "loss": 0.936, "step": 9800 }, { "epoch": 2.53, "learning_rate": 0.00018317972350230416, "loss": 0.9389, "step": 9900 }, { "epoch": 2.56, "learning_rate": 0.00017997951868919612, "loss": 0.9336, "step": 10000 }, { "epoch": 2.56, "eval_bleu": 7.80952091204729, "eval_loss": 0.897022545337677, "eval_runtime": 9.0616, "eval_samples_per_second": 220.711, "eval_steps_per_second": 1.766, "step": 10000 }, { "epoch": 2.59, "learning_rate": 0.00017677931387608805, "loss": 0.9376, "step": 10100 }, { "epoch": 2.61, "learning_rate": 0.00017357910906298001, "loss": 0.9344, "step": 10200 }, { "epoch": 2.64, "learning_rate": 0.00017037890424987198, "loss": 0.9358, "step": 10300 }, { "epoch": 2.66, "learning_rate": 0.00016717869943676394, "loss": 0.9309, "step": 10400 }, { "epoch": 2.69, "learning_rate": 0.0001639784946236559, "loss": 0.9368, "step": 10500 }, { "epoch": 2.69, "eval_bleu": 7.937668751101428, "eval_loss": 0.8936744928359985, "eval_runtime": 9.5691, "eval_samples_per_second": 209.006, "eval_steps_per_second": 1.672, "step": 10500 }, { "epoch": 2.71, "learning_rate": 0.00016077828981054789, "loss": 0.9337, "step": 10600 }, { "epoch": 2.74, "learning_rate": 0.00015757808499743985, "loss": 0.9266, "step": 10700 }, { "epoch": 2.76, "learning_rate": 0.0001543778801843318, "loss": 0.9276, "step": 10800 }, { "epoch": 2.79, "learning_rate": 0.00015117767537122377, "loss": 0.9266, "step": 10900 }, { "epoch": 2.82, "learning_rate": 0.00014797747055811573, "loss": 0.925, "step": 11000 }, { "epoch": 2.82, "eval_bleu": 7.842526661794731, "eval_loss": 0.8898113965988159, "eval_runtime": 9.4133, "eval_samples_per_second": 212.466, "eval_steps_per_second": 1.7, "step": 11000 }, { "epoch": 2.84, "learning_rate": 0.0001447772657450077, "loss": 0.9161, "step": 11100 }, { "epoch": 2.87, "learning_rate": 0.00014157706093189965, "loss": 0.9263, "step": 11200 }, { "epoch": 2.89, "learning_rate": 0.0001383768561187916, "loss": 0.9293, "step": 11300 }, { "epoch": 2.92, "learning_rate": 0.00013517665130568357, "loss": 0.924, "step": 11400 }, { "epoch": 2.94, "learning_rate": 0.00013197644649257554, "loss": 0.921, "step": 11500 }, { "epoch": 2.94, "eval_bleu": 7.900750479934787, "eval_loss": 0.8863839507102966, "eval_runtime": 9.7323, "eval_samples_per_second": 205.501, "eval_steps_per_second": 1.644, "step": 11500 }, { "epoch": 2.97, "learning_rate": 0.0001287762416794675, "loss": 0.9213, "step": 11600 }, { "epoch": 3.0, "learning_rate": 0.00012557603686635946, "loss": 0.9219, "step": 11700 }, { "epoch": 3.02, "learning_rate": 0.00012237583205325142, "loss": 0.9141, "step": 11800 }, { "epoch": 3.05, "learning_rate": 0.00011917562724014337, "loss": 0.9125, "step": 11900 }, { "epoch": 3.07, "learning_rate": 0.00011597542242703534, "loss": 0.9177, "step": 12000 }, { "epoch": 3.07, "eval_bleu": 7.913356299834283, "eval_loss": 0.8835927844047546, "eval_runtime": 9.3736, "eval_samples_per_second": 213.364, "eval_steps_per_second": 1.707, "step": 12000 }, { "epoch": 3.1, "learning_rate": 0.00019022017409114184, "loss": 0.9129, "step": 12100 }, { "epoch": 3.12, "learning_rate": 0.0001876600102406554, "loss": 0.9121, "step": 12200 }, { "epoch": 3.15, "learning_rate": 0.00018509984639016898, "loss": 0.9123, "step": 12300 }, { "epoch": 3.17, "learning_rate": 0.00018253968253968252, "loss": 0.9168, "step": 12400 }, { "epoch": 3.2, "learning_rate": 0.00017997951868919612, "loss": 0.9151, "step": 12500 }, { "epoch": 3.2, "eval_bleu": 7.864673119868297, "eval_loss": 0.8821397423744202, "eval_runtime": 11.1353, "eval_samples_per_second": 179.609, "eval_steps_per_second": 1.437, "step": 12500 }, { "epoch": 3.23, "learning_rate": 0.0001774193548387097, "loss": 0.9112, "step": 12600 }, { "epoch": 3.25, "learning_rate": 0.00017485919098822326, "loss": 0.9175, "step": 12700 }, { "epoch": 3.28, "learning_rate": 0.00017229902713773683, "loss": 0.9131, "step": 12800 }, { "epoch": 3.3, "learning_rate": 0.0001697388632872504, "loss": 0.9116, "step": 12900 }, { "epoch": 3.33, "learning_rate": 0.00016717869943676394, "loss": 0.9104, "step": 13000 }, { "epoch": 3.33, "eval_bleu": 8.083046367100428, "eval_loss": 0.8790320158004761, "eval_runtime": 9.2669, "eval_samples_per_second": 215.821, "eval_steps_per_second": 1.727, "step": 13000 }, { "epoch": 3.35, "learning_rate": 0.0001646185355862775, "loss": 0.9083, "step": 13100 }, { "epoch": 3.38, "learning_rate": 0.0001620583717357911, "loss": 0.9084, "step": 13200 }, { "epoch": 3.4, "learning_rate": 0.00015949820788530467, "loss": 0.9155, "step": 13300 }, { "epoch": 3.43, "learning_rate": 0.00015693804403481824, "loss": 0.9059, "step": 13400 }, { "epoch": 3.46, "learning_rate": 0.0001543778801843318, "loss": 0.9035, "step": 13500 }, { "epoch": 3.46, "eval_bleu": 8.095903306480848, "eval_loss": 0.8766345381736755, "eval_runtime": 9.4138, "eval_samples_per_second": 212.454, "eval_steps_per_second": 1.7, "step": 13500 }, { "epoch": 3.48, "learning_rate": 0.00015181771633384535, "loss": 0.9053, "step": 13600 }, { "epoch": 3.51, "learning_rate": 0.00014925755248335892, "loss": 0.9047, "step": 13700 }, { "epoch": 3.53, "learning_rate": 0.00014669738863287251, "loss": 0.9017, "step": 13800 }, { "epoch": 3.56, "learning_rate": 0.00014413722478238608, "loss": 0.9073, "step": 13900 }, { "epoch": 3.58, "learning_rate": 0.00014157706093189965, "loss": 0.8992, "step": 14000 }, { "epoch": 3.58, "eval_bleu": 8.01784311685486, "eval_loss": 0.8740718960762024, "eval_runtime": 9.3262, "eval_samples_per_second": 214.449, "eval_steps_per_second": 1.716, "step": 14000 }, { "epoch": 3.61, "learning_rate": 0.00013901689708141322, "loss": 0.904, "step": 14100 }, { "epoch": 3.64, "learning_rate": 0.0001364567332309268, "loss": 0.9043, "step": 14200 }, { "epoch": 3.66, "learning_rate": 0.00013389656938044033, "loss": 0.9038, "step": 14300 }, { "epoch": 3.69, "learning_rate": 0.0001313364055299539, "loss": 0.9061, "step": 14400 }, { "epoch": 3.71, "learning_rate": 0.0001287762416794675, "loss": 0.8986, "step": 14500 }, { "epoch": 3.71, "eval_bleu": 8.038360794483163, "eval_loss": 0.8720167875289917, "eval_runtime": 9.7469, "eval_samples_per_second": 205.193, "eval_steps_per_second": 1.642, "step": 14500 }, { "epoch": 3.74, "learning_rate": 0.00012621607782898107, "loss": 0.8972, "step": 14600 }, { "epoch": 3.76, "learning_rate": 0.00012365591397849463, "loss": 0.9032, "step": 14700 }, { "epoch": 3.79, "learning_rate": 0.00012109575012800819, "loss": 0.9009, "step": 14800 }, { "epoch": 3.81, "learning_rate": 0.00011853558627752177, "loss": 0.9021, "step": 14900 }, { "epoch": 3.84, "learning_rate": 0.00011597542242703534, "loss": 0.894, "step": 15000 }, { "epoch": 3.84, "eval_bleu": 8.091339671473314, "eval_loss": 0.8682767152786255, "eval_runtime": 9.833, "eval_samples_per_second": 203.397, "eval_steps_per_second": 1.627, "step": 15000 }, { "epoch": 3.87, "learning_rate": 0.0001134152585765489, "loss": 0.9011, "step": 15100 }, { "epoch": 3.89, "learning_rate": 0.00011085509472606247, "loss": 0.9016, "step": 15200 }, { "epoch": 3.92, "learning_rate": 0.00010829493087557605, "loss": 0.8951, "step": 15300 }, { "epoch": 3.94, "learning_rate": 0.0001057347670250896, "loss": 0.8981, "step": 15400 }, { "epoch": 3.97, "learning_rate": 0.00010317460317460317, "loss": 0.8932, "step": 15500 }, { "epoch": 3.97, "eval_bleu": 8.099679129372298, "eval_loss": 0.8663304448127747, "eval_runtime": 9.7444, "eval_samples_per_second": 205.246, "eval_steps_per_second": 1.642, "step": 15500 }, { "epoch": 3.99, "learning_rate": 0.00010061443932411675, "loss": 0.8991, "step": 15600 }, { "epoch": 4.02, "learning_rate": 9.805427547363031e-05, "loss": 0.8937, "step": 15700 }, { "epoch": 4.04, "learning_rate": 9.549411162314388e-05, "loss": 0.8877, "step": 15800 }, { "epoch": 4.07, "learning_rate": 9.293394777265746e-05, "loss": 0.8916, "step": 15900 }, { "epoch": 4.1, "learning_rate": 9.037378392217102e-05, "loss": 0.8889, "step": 16000 }, { "epoch": 4.1, "eval_bleu": 8.108828400885466, "eval_loss": 0.8641292452812195, "eval_runtime": 9.0966, "eval_samples_per_second": 219.862, "eval_steps_per_second": 1.759, "step": 16000 } ], "logging_steps": 100, "max_steps": 19530, "num_train_epochs": 5, "save_steps": 2000, "total_flos": 4.675529716059341e+16, "trial_name": null, "trial_params": null }