{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.97323295692179, "eval_steps": 2000, "global_step": 38000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.9895441237975744e-05, "loss": 2.5556, "step": 200 }, { "epoch": 0.04, "learning_rate": 1.9790882475951487e-05, "loss": 2.1664, "step": 400 }, { "epoch": 0.06, "learning_rate": 1.968632371392723e-05, "loss": 2.1184, "step": 600 }, { "epoch": 0.08, "learning_rate": 1.958176495190297e-05, "loss": 2.0691, "step": 800 }, { "epoch": 0.1, "learning_rate": 1.9477206189878712e-05, "loss": 2.0324, "step": 1000 }, { "epoch": 0.13, "learning_rate": 1.9372647427854455e-05, "loss": 2.0135, "step": 1200 }, { "epoch": 0.15, "learning_rate": 1.9268088665830197e-05, "loss": 2.0028, "step": 1400 }, { "epoch": 0.17, "learning_rate": 1.916352990380594e-05, "loss": 1.99, "step": 1600 }, { "epoch": 0.19, "learning_rate": 1.9059493935591804e-05, "loss": 1.9608, "step": 1800 }, { "epoch": 0.21, "learning_rate": 1.8954935173567547e-05, "loss": 1.9402, "step": 2000 }, { "epoch": 0.21, "eval_bleu": 0.2597, "eval_gen_len": 18.3893, "eval_loss": 1.7414125204086304, "eval_runtime": 71.3769, "eval_samples_per_second": 45.127, "eval_steps_per_second": 1.415, "step": 2000 }, { "epoch": 0.23, "learning_rate": 1.885089920535341e-05, "loss": 1.9321, "step": 2200 }, { "epoch": 0.25, "learning_rate": 1.8746340443329153e-05, "loss": 1.8953, "step": 2400 }, { "epoch": 0.27, "learning_rate": 1.8641781681304896e-05, "loss": 1.9034, "step": 2600 }, { "epoch": 0.29, "learning_rate": 1.853722291928064e-05, "loss": 1.8885, "step": 2800 }, { "epoch": 0.31, "learning_rate": 1.8432664157256378e-05, "loss": 1.8809, "step": 3000 }, { "epoch": 0.33, "learning_rate": 1.832810539523212e-05, "loss": 1.8842, "step": 3200 }, { "epoch": 0.36, "learning_rate": 1.8223546633207864e-05, "loss": 1.8553, "step": 3400 }, { "epoch": 0.38, "learning_rate": 1.8118987871183606e-05, "loss": 1.8627, "step": 3600 }, { "epoch": 0.4, "learning_rate": 1.801442910915935e-05, "loss": 1.8589, "step": 3800 }, { "epoch": 0.42, "learning_rate": 1.7909870347135092e-05, "loss": 1.8503, "step": 4000 }, { "epoch": 0.42, "eval_bleu": 0.2671, "eval_gen_len": 18.3657, "eval_loss": 1.6917428970336914, "eval_runtime": 71.0352, "eval_samples_per_second": 45.344, "eval_steps_per_second": 1.422, "step": 4000 }, { "epoch": 0.44, "learning_rate": 1.7805311585110835e-05, "loss": 1.8452, "step": 4200 }, { "epoch": 0.46, "learning_rate": 1.7700752823086574e-05, "loss": 1.8537, "step": 4400 }, { "epoch": 0.48, "learning_rate": 1.759619406106232e-05, "loss": 1.8557, "step": 4600 }, { "epoch": 0.5, "learning_rate": 1.749163529903806e-05, "loss": 1.8429, "step": 4800 }, { "epoch": 0.52, "learning_rate": 1.7387076537013802e-05, "loss": 1.8429, "step": 5000 }, { "epoch": 0.54, "learning_rate": 1.7282517774989545e-05, "loss": 1.8295, "step": 5200 }, { "epoch": 0.56, "learning_rate": 1.7177959012965288e-05, "loss": 1.8243, "step": 5400 }, { "epoch": 0.59, "learning_rate": 1.707340025094103e-05, "loss": 1.8388, "step": 5600 }, { "epoch": 0.61, "learning_rate": 1.6968841488916773e-05, "loss": 1.8347, "step": 5800 }, { "epoch": 0.63, "learning_rate": 1.6864282726892516e-05, "loss": 1.8342, "step": 6000 }, { "epoch": 0.63, "eval_bleu": 0.2695, "eval_gen_len": 18.2366, "eval_loss": 1.6622676849365234, "eval_runtime": 70.9517, "eval_samples_per_second": 45.397, "eval_steps_per_second": 1.424, "step": 6000 }, { "epoch": 0.65, "learning_rate": 1.675972396486826e-05, "loss": 1.8022, "step": 6200 }, { "epoch": 0.67, "learning_rate": 1.6655165202843998e-05, "loss": 1.8215, "step": 6400 }, { "epoch": 0.69, "learning_rate": 1.6550606440819744e-05, "loss": 1.8081, "step": 6600 }, { "epoch": 0.71, "learning_rate": 1.6446047678795483e-05, "loss": 1.8093, "step": 6800 }, { "epoch": 0.73, "learning_rate": 1.6341488916771226e-05, "loss": 1.7879, "step": 7000 }, { "epoch": 0.75, "learning_rate": 1.623693015474697e-05, "loss": 1.8126, "step": 7200 }, { "epoch": 0.77, "learning_rate": 1.6132371392722712e-05, "loss": 1.7782, "step": 7400 }, { "epoch": 0.79, "learning_rate": 1.6027812630698454e-05, "loss": 1.7955, "step": 7600 }, { "epoch": 0.82, "learning_rate": 1.5923253868674197e-05, "loss": 1.7975, "step": 7800 }, { "epoch": 0.84, "learning_rate": 1.581869510664994e-05, "loss": 1.8201, "step": 8000 }, { "epoch": 0.84, "eval_bleu": 0.2721, "eval_gen_len": 18.2549, "eval_loss": 1.643432378768921, "eval_runtime": 71.6597, "eval_samples_per_second": 44.949, "eval_steps_per_second": 1.409, "step": 8000 }, { "epoch": 0.86, "learning_rate": 1.5714136344625683e-05, "loss": 1.8129, "step": 8200 }, { "epoch": 0.88, "learning_rate": 1.5609577582601422e-05, "loss": 1.7791, "step": 8400 }, { "epoch": 0.9, "learning_rate": 1.5505018820577165e-05, "loss": 1.7857, "step": 8600 }, { "epoch": 0.92, "learning_rate": 1.5400460058552907e-05, "loss": 1.7819, "step": 8800 }, { "epoch": 0.94, "learning_rate": 1.529590129652865e-05, "loss": 1.7665, "step": 9000 }, { "epoch": 0.96, "learning_rate": 1.5191342534504391e-05, "loss": 1.7671, "step": 9200 }, { "epoch": 0.98, "learning_rate": 1.5086783772480136e-05, "loss": 1.7696, "step": 9400 }, { "epoch": 1.0, "learning_rate": 1.4982747804265998e-05, "loss": 1.781, "step": 9600 }, { "epoch": 1.02, "learning_rate": 1.4878189042241742e-05, "loss": 1.751, "step": 9800 }, { "epoch": 1.05, "learning_rate": 1.4773630280217483e-05, "loss": 1.7567, "step": 10000 }, { "epoch": 1.05, "eval_bleu": 0.2744, "eval_gen_len": 18.1981, "eval_loss": 1.629482626914978, "eval_runtime": 71.9531, "eval_samples_per_second": 44.765, "eval_steps_per_second": 1.404, "step": 10000 }, { "epoch": 1.07, "learning_rate": 1.4669071518193226e-05, "loss": 1.7625, "step": 10200 }, { "epoch": 1.09, "learning_rate": 1.4564512756168969e-05, "loss": 1.7674, "step": 10400 }, { "epoch": 1.11, "learning_rate": 1.4459953994144712e-05, "loss": 1.7418, "step": 10600 }, { "epoch": 1.13, "learning_rate": 1.4355395232120453e-05, "loss": 1.7596, "step": 10800 }, { "epoch": 1.15, "learning_rate": 1.4250836470096194e-05, "loss": 1.7466, "step": 11000 }, { "epoch": 1.17, "learning_rate": 1.4146277708071938e-05, "loss": 1.7583, "step": 11200 }, { "epoch": 1.19, "learning_rate": 1.4041718946047679e-05, "loss": 1.7522, "step": 11400 }, { "epoch": 1.21, "learning_rate": 1.3937160184023422e-05, "loss": 1.7332, "step": 11600 }, { "epoch": 1.23, "learning_rate": 1.3832601421999166e-05, "loss": 1.7557, "step": 11800 }, { "epoch": 1.25, "learning_rate": 1.3728042659974907e-05, "loss": 1.745, "step": 12000 }, { "epoch": 1.25, "eval_bleu": 0.2747, "eval_gen_len": 18.1897, "eval_loss": 1.6197348833084106, "eval_runtime": 70.3482, "eval_samples_per_second": 45.787, "eval_steps_per_second": 1.436, "step": 12000 }, { "epoch": 1.28, "learning_rate": 1.3623483897950648e-05, "loss": 1.744, "step": 12200 }, { "epoch": 1.3, "learning_rate": 1.3518925135926391e-05, "loss": 1.7341, "step": 12400 }, { "epoch": 1.32, "learning_rate": 1.3414366373902134e-05, "loss": 1.7437, "step": 12600 }, { "epoch": 1.34, "learning_rate": 1.3309807611877877e-05, "loss": 1.7423, "step": 12800 }, { "epoch": 1.36, "learning_rate": 1.3205248849853618e-05, "loss": 1.7377, "step": 13000 }, { "epoch": 1.38, "learning_rate": 1.3100690087829362e-05, "loss": 1.7399, "step": 13200 }, { "epoch": 1.4, "learning_rate": 1.2996131325805103e-05, "loss": 1.7488, "step": 13400 }, { "epoch": 1.42, "learning_rate": 1.2891572563780846e-05, "loss": 1.7621, "step": 13600 }, { "epoch": 1.44, "learning_rate": 1.278753659556671e-05, "loss": 1.7282, "step": 13800 }, { "epoch": 1.46, "learning_rate": 1.268297783354245e-05, "loss": 1.7492, "step": 14000 }, { "epoch": 1.46, "eval_bleu": 0.2753, "eval_gen_len": 18.1773, "eval_loss": 1.608814001083374, "eval_runtime": 71.3668, "eval_samples_per_second": 45.133, "eval_steps_per_second": 1.415, "step": 14000 }, { "epoch": 1.48, "learning_rate": 1.2578419071518195e-05, "loss": 1.7392, "step": 14200 }, { "epoch": 1.51, "learning_rate": 1.2473860309493936e-05, "loss": 1.7378, "step": 14400 }, { "epoch": 1.53, "learning_rate": 1.2369301547469679e-05, "loss": 1.727, "step": 14600 }, { "epoch": 1.55, "learning_rate": 1.226474278544542e-05, "loss": 1.7366, "step": 14800 }, { "epoch": 1.57, "learning_rate": 1.2160184023421164e-05, "loss": 1.7405, "step": 15000 }, { "epoch": 1.59, "learning_rate": 1.2055625261396906e-05, "loss": 1.7292, "step": 15200 }, { "epoch": 1.61, "learning_rate": 1.1951066499372648e-05, "loss": 1.7514, "step": 15400 }, { "epoch": 1.63, "learning_rate": 1.1846507737348391e-05, "loss": 1.7322, "step": 15600 }, { "epoch": 1.65, "learning_rate": 1.1742471769134253e-05, "loss": 1.7417, "step": 15800 }, { "epoch": 1.67, "learning_rate": 1.1637913007109998e-05, "loss": 1.7126, "step": 16000 }, { "epoch": 1.67, "eval_bleu": 0.2771, "eval_gen_len": 18.2412, "eval_loss": 1.6022411584854126, "eval_runtime": 71.0218, "eval_samples_per_second": 45.352, "eval_steps_per_second": 1.422, "step": 16000 }, { "epoch": 1.69, "learning_rate": 1.1533354245085739e-05, "loss": 1.7279, "step": 16200 }, { "epoch": 1.71, "learning_rate": 1.1428795483061481e-05, "loss": 1.7124, "step": 16400 }, { "epoch": 1.74, "learning_rate": 1.1324236721037222e-05, "loss": 1.7307, "step": 16600 }, { "epoch": 1.76, "learning_rate": 1.1219677959012967e-05, "loss": 1.7178, "step": 16800 }, { "epoch": 1.78, "learning_rate": 1.1115119196988708e-05, "loss": 1.7, "step": 17000 }, { "epoch": 1.8, "learning_rate": 1.101056043496445e-05, "loss": 1.7064, "step": 17200 }, { "epoch": 1.82, "learning_rate": 1.0906001672940193e-05, "loss": 1.7242, "step": 17400 }, { "epoch": 1.84, "learning_rate": 1.0801442910915936e-05, "loss": 1.7111, "step": 17600 }, { "epoch": 1.86, "learning_rate": 1.0696884148891677e-05, "loss": 1.7125, "step": 17800 }, { "epoch": 1.88, "learning_rate": 1.0592325386867422e-05, "loss": 1.7251, "step": 18000 }, { "epoch": 1.88, "eval_bleu": 0.2773, "eval_gen_len": 18.1916, "eval_loss": 1.5964778661727905, "eval_runtime": 71.6245, "eval_samples_per_second": 44.971, "eval_steps_per_second": 1.41, "step": 18000 }, { "epoch": 1.9, "learning_rate": 1.0488289418653284e-05, "loss": 1.7147, "step": 18200 }, { "epoch": 1.92, "learning_rate": 1.0383730656629028e-05, "loss": 1.7224, "step": 18400 }, { "epoch": 1.94, "learning_rate": 1.027917189460477e-05, "loss": 1.717, "step": 18600 }, { "epoch": 1.97, "learning_rate": 1.017461313258051e-05, "loss": 1.7086, "step": 18800 }, { "epoch": 1.99, "learning_rate": 1.0070054370556253e-05, "loss": 1.7117, "step": 19000 }, { "epoch": 2.01, "learning_rate": 9.966018402342117e-06, "loss": 1.7005, "step": 19200 }, { "epoch": 2.03, "learning_rate": 9.86145964031786e-06, "loss": 1.6843, "step": 19400 }, { "epoch": 2.05, "learning_rate": 9.7569008782936e-06, "loss": 1.6922, "step": 19600 }, { "epoch": 2.07, "learning_rate": 9.652342116269344e-06, "loss": 1.6985, "step": 19800 }, { "epoch": 2.09, "learning_rate": 9.547783354245086e-06, "loss": 1.6824, "step": 20000 }, { "epoch": 2.09, "eval_bleu": 0.2763, "eval_gen_len": 18.1354, "eval_loss": 1.5918101072311401, "eval_runtime": 69.9735, "eval_samples_per_second": 46.032, "eval_steps_per_second": 1.443, "step": 20000 }, { "epoch": 2.11, "learning_rate": 9.443224592220829e-06, "loss": 1.7036, "step": 20200 }, { "epoch": 2.13, "learning_rate": 9.338665830196572e-06, "loss": 1.7075, "step": 20400 }, { "epoch": 2.15, "learning_rate": 9.234107068172313e-06, "loss": 1.7022, "step": 20600 }, { "epoch": 2.17, "learning_rate": 9.129548306148056e-06, "loss": 1.6877, "step": 20800 }, { "epoch": 2.2, "learning_rate": 9.024989544123798e-06, "loss": 1.6877, "step": 21000 }, { "epoch": 2.22, "learning_rate": 8.920430782099541e-06, "loss": 1.6853, "step": 21200 }, { "epoch": 2.24, "learning_rate": 8.815872020075284e-06, "loss": 1.7016, "step": 21400 }, { "epoch": 2.26, "learning_rate": 8.711313258051025e-06, "loss": 1.7158, "step": 21600 }, { "epoch": 2.28, "learning_rate": 8.606754496026768e-06, "loss": 1.695, "step": 21800 }, { "epoch": 2.3, "learning_rate": 8.50219573400251e-06, "loss": 1.6868, "step": 22000 }, { "epoch": 2.3, "eval_bleu": 0.2786, "eval_gen_len": 18.1912, "eval_loss": 1.5873745679855347, "eval_runtime": 70.6377, "eval_samples_per_second": 45.599, "eval_steps_per_second": 1.43, "step": 22000 }, { "epoch": 2.32, "learning_rate": 8.397636971978253e-06, "loss": 1.6916, "step": 22200 }, { "epoch": 2.34, "learning_rate": 8.293078209953996e-06, "loss": 1.7041, "step": 22400 }, { "epoch": 2.36, "learning_rate": 8.188519447929737e-06, "loss": 1.6802, "step": 22600 }, { "epoch": 2.38, "learning_rate": 8.08396068590548e-06, "loss": 1.6876, "step": 22800 }, { "epoch": 2.4, "learning_rate": 7.979401923881222e-06, "loss": 1.6922, "step": 23000 }, { "epoch": 2.43, "learning_rate": 7.874843161856965e-06, "loss": 1.7058, "step": 23200 }, { "epoch": 2.45, "learning_rate": 7.770284399832708e-06, "loss": 1.6939, "step": 23400 }, { "epoch": 2.47, "learning_rate": 7.665725637808449e-06, "loss": 1.6887, "step": 23600 }, { "epoch": 2.49, "learning_rate": 7.5616896695943135e-06, "loss": 1.6785, "step": 23800 }, { "epoch": 2.51, "learning_rate": 7.4571309075700546e-06, "loss": 1.7067, "step": 24000 }, { "epoch": 2.51, "eval_bleu": 0.2776, "eval_gen_len": 18.109, "eval_loss": 1.5828683376312256, "eval_runtime": 70.2744, "eval_samples_per_second": 45.835, "eval_steps_per_second": 1.437, "step": 24000 }, { "epoch": 2.53, "learning_rate": 7.352572145545797e-06, "loss": 1.6752, "step": 24200 }, { "epoch": 2.55, "learning_rate": 7.248013383521539e-06, "loss": 1.6866, "step": 24400 }, { "epoch": 2.57, "learning_rate": 7.143454621497282e-06, "loss": 1.6757, "step": 24600 }, { "epoch": 2.59, "learning_rate": 7.038895859473024e-06, "loss": 1.679, "step": 24800 }, { "epoch": 2.61, "learning_rate": 6.9343370974487666e-06, "loss": 1.6995, "step": 25000 }, { "epoch": 2.63, "learning_rate": 6.829778335424509e-06, "loss": 1.6851, "step": 25200 }, { "epoch": 2.66, "learning_rate": 6.725219573400251e-06, "loss": 1.6938, "step": 25400 }, { "epoch": 2.68, "learning_rate": 6.620660811375994e-06, "loss": 1.6919, "step": 25600 }, { "epoch": 2.7, "learning_rate": 6.516102049351736e-06, "loss": 1.6884, "step": 25800 }, { "epoch": 2.72, "learning_rate": 6.4115432873274786e-06, "loss": 1.6916, "step": 26000 }, { "epoch": 2.72, "eval_bleu": 0.2787, "eval_gen_len": 18.1813, "eval_loss": 1.5792288780212402, "eval_runtime": 70.8136, "eval_samples_per_second": 45.486, "eval_steps_per_second": 1.426, "step": 26000 }, { "epoch": 2.74, "learning_rate": 6.306984525303221e-06, "loss": 1.6838, "step": 26200 }, { "epoch": 2.76, "learning_rate": 6.202425763278963e-06, "loss": 1.6926, "step": 26400 }, { "epoch": 2.78, "learning_rate": 6.098389795064827e-06, "loss": 1.684, "step": 26600 }, { "epoch": 2.8, "learning_rate": 5.993831033040569e-06, "loss": 1.6785, "step": 26800 }, { "epoch": 2.82, "learning_rate": 5.889272271016312e-06, "loss": 1.68, "step": 27000 }, { "epoch": 2.84, "learning_rate": 5.784713508992054e-06, "loss": 1.683, "step": 27200 }, { "epoch": 2.86, "learning_rate": 5.680154746967796e-06, "loss": 1.6782, "step": 27400 }, { "epoch": 2.89, "learning_rate": 5.575595984943539e-06, "loss": 1.6867, "step": 27600 }, { "epoch": 2.91, "learning_rate": 5.471037222919281e-06, "loss": 1.6677, "step": 27800 }, { "epoch": 2.93, "learning_rate": 5.366478460895024e-06, "loss": 1.6702, "step": 28000 }, { "epoch": 2.93, "eval_bleu": 0.279, "eval_gen_len": 18.1614, "eval_loss": 1.5775508880615234, "eval_runtime": 70.2225, "eval_samples_per_second": 45.868, "eval_steps_per_second": 1.438, "step": 28000 }, { "epoch": 2.95, "learning_rate": 5.261919698870766e-06, "loss": 1.6805, "step": 28200 }, { "epoch": 2.97, "learning_rate": 5.157360936846508e-06, "loss": 1.6956, "step": 28400 }, { "epoch": 2.99, "learning_rate": 5.05280217482225e-06, "loss": 1.6998, "step": 28600 }, { "epoch": 3.01, "learning_rate": 4.948766206608114e-06, "loss": 1.6684, "step": 28800 }, { "epoch": 3.03, "learning_rate": 4.844207444583857e-06, "loss": 1.6834, "step": 29000 }, { "epoch": 3.05, "learning_rate": 4.739648682559599e-06, "loss": 1.6769, "step": 29200 }, { "epoch": 3.07, "learning_rate": 4.6350899205353415e-06, "loss": 1.6872, "step": 29400 }, { "epoch": 3.09, "learning_rate": 4.531053952321205e-06, "loss": 1.6708, "step": 29600 }, { "epoch": 3.12, "learning_rate": 4.426495190296947e-06, "loss": 1.6631, "step": 29800 }, { "epoch": 3.14, "learning_rate": 4.321936428272689e-06, "loss": 1.6792, "step": 30000 }, { "epoch": 3.14, "eval_bleu": 0.2784, "eval_gen_len": 18.1822, "eval_loss": 1.5754998922348022, "eval_runtime": 70.3523, "eval_samples_per_second": 45.784, "eval_steps_per_second": 1.436, "step": 30000 }, { "epoch": 3.16, "learning_rate": 4.217377666248432e-06, "loss": 1.6546, "step": 30200 }, { "epoch": 3.18, "learning_rate": 4.112818904224175e-06, "loss": 1.6753, "step": 30400 }, { "epoch": 3.2, "learning_rate": 4.0082601421999166e-06, "loss": 1.6361, "step": 30600 }, { "epoch": 3.22, "learning_rate": 3.903701380175659e-06, "loss": 1.6696, "step": 30800 }, { "epoch": 3.24, "learning_rate": 3.799142618151401e-06, "loss": 1.6658, "step": 31000 }, { "epoch": 3.26, "learning_rate": 3.6945838561271435e-06, "loss": 1.6802, "step": 31200 }, { "epoch": 3.28, "learning_rate": 3.5900250941028862e-06, "loss": 1.6809, "step": 31400 }, { "epoch": 3.3, "learning_rate": 3.4854663320786286e-06, "loss": 1.6615, "step": 31600 }, { "epoch": 3.32, "learning_rate": 3.380907570054371e-06, "loss": 1.6761, "step": 31800 }, { "epoch": 3.35, "learning_rate": 3.276348808030113e-06, "loss": 1.65, "step": 32000 }, { "epoch": 3.35, "eval_bleu": 0.2791, "eval_gen_len": 18.1704, "eval_loss": 1.5742353200912476, "eval_runtime": 69.3182, "eval_samples_per_second": 46.467, "eval_steps_per_second": 1.457, "step": 32000 }, { "epoch": 3.37, "learning_rate": 3.1717900460058555e-06, "loss": 1.6806, "step": 32200 }, { "epoch": 3.39, "learning_rate": 3.067231283981598e-06, "loss": 1.6687, "step": 32400 }, { "epoch": 3.41, "learning_rate": 2.9626725219573405e-06, "loss": 1.6667, "step": 32600 }, { "epoch": 3.43, "learning_rate": 2.858113759933083e-06, "loss": 1.6667, "step": 32800 }, { "epoch": 3.45, "learning_rate": 2.753554997908825e-06, "loss": 1.6679, "step": 33000 }, { "epoch": 3.47, "learning_rate": 2.648996235884567e-06, "loss": 1.6717, "step": 33200 }, { "epoch": 3.49, "learning_rate": 2.5444374738603094e-06, "loss": 1.6806, "step": 33400 }, { "epoch": 3.51, "learning_rate": 2.439878711836052e-06, "loss": 1.6491, "step": 33600 }, { "epoch": 3.53, "learning_rate": 2.3353199498117944e-06, "loss": 1.6879, "step": 33800 }, { "epoch": 3.55, "learning_rate": 2.2307611877875367e-06, "loss": 1.6708, "step": 34000 }, { "epoch": 3.55, "eval_bleu": 0.2797, "eval_gen_len": 18.1627, "eval_loss": 1.5723748207092285, "eval_runtime": 70.4906, "eval_samples_per_second": 45.694, "eval_steps_per_second": 1.433, "step": 34000 }, { "epoch": 3.58, "learning_rate": 2.126202425763279e-06, "loss": 1.6777, "step": 34200 }, { "epoch": 3.6, "learning_rate": 2.0216436637390218e-06, "loss": 1.6832, "step": 34400 }, { "epoch": 3.62, "learning_rate": 1.9176076955248853e-06, "loss": 1.6621, "step": 34600 }, { "epoch": 3.64, "learning_rate": 1.8130489335006274e-06, "loss": 1.6588, "step": 34800 }, { "epoch": 3.66, "learning_rate": 1.70849017147637e-06, "loss": 1.6655, "step": 35000 }, { "epoch": 3.68, "learning_rate": 1.6039314094521122e-06, "loss": 1.6618, "step": 35200 }, { "epoch": 3.7, "learning_rate": 1.4993726474278547e-06, "loss": 1.6548, "step": 35400 }, { "epoch": 3.72, "learning_rate": 1.394813885403597e-06, "loss": 1.6768, "step": 35600 }, { "epoch": 3.74, "learning_rate": 1.2902551233793392e-06, "loss": 1.6629, "step": 35800 }, { "epoch": 3.76, "learning_rate": 1.1856963613550817e-06, "loss": 1.6683, "step": 36000 }, { "epoch": 3.76, "eval_bleu": 0.2799, "eval_gen_len": 18.1835, "eval_loss": 1.5721137523651123, "eval_runtime": 70.8793, "eval_samples_per_second": 45.443, "eval_steps_per_second": 1.425, "step": 36000 }, { "epoch": 3.79, "learning_rate": 1.081137599330824e-06, "loss": 1.6514, "step": 36200 }, { "epoch": 3.81, "learning_rate": 9.765788373065663e-07, "loss": 1.6605, "step": 36400 }, { "epoch": 3.83, "learning_rate": 8.720200752823086e-07, "loss": 1.6594, "step": 36600 }, { "epoch": 3.85, "learning_rate": 7.67461313258051e-07, "loss": 1.6662, "step": 36800 }, { "epoch": 3.87, "learning_rate": 6.629025512337935e-07, "loss": 1.6949, "step": 37000 }, { "epoch": 3.89, "learning_rate": 5.583437892095358e-07, "loss": 1.6592, "step": 37200 }, { "epoch": 3.91, "learning_rate": 4.543078209953995e-07, "loss": 1.6552, "step": 37400 }, { "epoch": 3.93, "learning_rate": 3.497490589711418e-07, "loss": 1.6605, "step": 37600 }, { "epoch": 3.95, "learning_rate": 2.4519029694688416e-07, "loss": 1.6834, "step": 37800 }, { "epoch": 3.97, "learning_rate": 1.4063153492262652e-07, "loss": 1.6643, "step": 38000 }, { "epoch": 3.97, "eval_bleu": 0.2801, "eval_gen_len": 18.1801, "eval_loss": 1.5713245868682861, "eval_runtime": 70.3463, "eval_samples_per_second": 45.788, "eval_steps_per_second": 1.436, "step": 38000 } ], "logging_steps": 200, "max_steps": 38256, "num_train_epochs": 4, "save_steps": 500, "total_flos": 7.865111955999744e+16, "trial_name": null, "trial_params": null }