{ "best_metric": 50.2114, "best_model_checkpoint": "./jako_13p_tokenie_run1/checkpoint-19200", "epoch": 9.997403271877435, "eval_steps": 1600, "global_step": 19250, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26, "learning_rate": 4.9212598425196856e-05, "loss": 1.7571, "step": 500 }, { "epoch": 0.52, "learning_rate": 4.79002624671916e-05, "loss": 1.3294, "step": 1000 }, { "epoch": 0.78, "learning_rate": 4.6587926509186354e-05, "loss": 1.2125, "step": 1500 }, { "epoch": 0.83, "eval_bleu": 44.2732, "eval_gen_len": 18.9394, "eval_loss": 1.1356315612792969, "eval_runtime": 557.6241, "eval_samples_per_second": 13.81, "eval_steps_per_second": 0.864, "step": 1600 }, { "epoch": 1.04, "learning_rate": 4.52755905511811e-05, "loss": 1.1386, "step": 2000 }, { "epoch": 1.3, "learning_rate": 4.396325459317586e-05, "loss": 0.9283, "step": 2500 }, { "epoch": 1.56, "learning_rate": 4.2650918635170604e-05, "loss": 0.8519, "step": 3000 }, { "epoch": 1.66, "eval_bleu": 47.1622, "eval_gen_len": 18.3936, "eval_loss": 1.061800241470337, "eval_runtime": 524.2089, "eval_samples_per_second": 14.691, "eval_steps_per_second": 0.919, "step": 3200 }, { "epoch": 1.82, "learning_rate": 4.133858267716536e-05, "loss": 0.8109, "step": 3500 }, { "epoch": 2.08, "learning_rate": 4.00262467191601e-05, "loss": 0.7727, "step": 4000 }, { "epoch": 2.34, "learning_rate": 3.871391076115486e-05, "loss": 0.6394, "step": 4500 }, { "epoch": 2.49, "eval_bleu": 47.7818, "eval_gen_len": 18.3397, "eval_loss": 1.0923182964324951, "eval_runtime": 516.7936, "eval_samples_per_second": 14.902, "eval_steps_per_second": 0.933, "step": 4800 }, { "epoch": 2.6, "learning_rate": 3.740157480314961e-05, "loss": 0.5875, "step": 5000 }, { "epoch": 2.86, "learning_rate": 3.608923884514436e-05, "loss": 0.5625, "step": 5500 }, { "epoch": 3.12, "learning_rate": 3.4776902887139105e-05, "loss": 0.532, "step": 6000 }, { "epoch": 3.32, "eval_bleu": 48.4283, "eval_gen_len": 18.3375, "eval_loss": 1.1293830871582031, "eval_runtime": 519.3842, "eval_samples_per_second": 14.827, "eval_steps_per_second": 0.928, "step": 6400 }, { "epoch": 3.38, "learning_rate": 3.3464566929133864e-05, "loss": 0.4299, "step": 6500 }, { "epoch": 3.64, "learning_rate": 3.215223097112861e-05, "loss": 0.3984, "step": 7000 }, { "epoch": 3.9, "learning_rate": 3.083989501312336e-05, "loss": 0.3857, "step": 7500 }, { "epoch": 4.15, "learning_rate": 2.952755905511811e-05, "loss": 0.3543, "step": 8000 }, { "epoch": 4.15, "eval_bleu": 47.7916, "eval_gen_len": 18.4422, "eval_loss": 1.176469087600708, "eval_runtime": 519.0077, "eval_samples_per_second": 14.838, "eval_steps_per_second": 0.929, "step": 8000 }, { "epoch": 4.41, "learning_rate": 2.8215223097112863e-05, "loss": 0.2836, "step": 8500 }, { "epoch": 4.67, "learning_rate": 2.6902887139107612e-05, "loss": 0.2648, "step": 9000 }, { "epoch": 4.93, "learning_rate": 2.5590551181102364e-05, "loss": 0.2569, "step": 9500 }, { "epoch": 4.99, "eval_bleu": 48.1268, "eval_gen_len": 18.5385, "eval_loss": 1.2102879285812378, "eval_runtime": 526.7602, "eval_samples_per_second": 14.62, "eval_steps_per_second": 0.915, "step": 9600 }, { "epoch": 5.19, "learning_rate": 2.4278215223097113e-05, "loss": 0.2268, "step": 10000 }, { "epoch": 5.45, "learning_rate": 2.2965879265091865e-05, "loss": 0.1854, "step": 10500 }, { "epoch": 5.71, "learning_rate": 2.1653543307086614e-05, "loss": 0.1732, "step": 11000 }, { "epoch": 5.82, "eval_bleu": 48.9329, "eval_gen_len": 18.2085, "eval_loss": 1.25494384765625, "eval_runtime": 505.0437, "eval_samples_per_second": 15.248, "eval_steps_per_second": 0.954, "step": 11200 }, { "epoch": 5.97, "learning_rate": 2.0341207349081366e-05, "loss": 0.1693, "step": 11500 }, { "epoch": 6.23, "learning_rate": 1.9028871391076115e-05, "loss": 0.1453, "step": 12000 }, { "epoch": 6.49, "learning_rate": 1.7716535433070868e-05, "loss": 0.1228, "step": 12500 }, { "epoch": 6.65, "eval_bleu": 49.0248, "eval_gen_len": 18.2133, "eval_loss": 1.3022269010543823, "eval_runtime": 504.7977, "eval_samples_per_second": 15.256, "eval_steps_per_second": 0.955, "step": 12800 }, { "epoch": 6.75, "learning_rate": 1.6404199475065617e-05, "loss": 0.1158, "step": 13000 }, { "epoch": 7.01, "learning_rate": 1.5091863517060367e-05, "loss": 0.1144, "step": 13500 }, { "epoch": 7.27, "learning_rate": 1.377952755905512e-05, "loss": 0.0937, "step": 14000 }, { "epoch": 7.48, "eval_bleu": 49.3503, "eval_gen_len": 18.1673, "eval_loss": 1.317897081375122, "eval_runtime": 503.3739, "eval_samples_per_second": 15.299, "eval_steps_per_second": 0.958, "step": 14400 }, { "epoch": 7.53, "learning_rate": 1.246719160104987e-05, "loss": 0.0829, "step": 14500 }, { "epoch": 7.79, "learning_rate": 1.115485564304462e-05, "loss": 0.0783, "step": 15000 }, { "epoch": 8.05, "learning_rate": 9.842519685039371e-06, "loss": 0.0779, "step": 15500 }, { "epoch": 8.31, "learning_rate": 8.530183727034122e-06, "loss": 0.0627, "step": 16000 }, { "epoch": 8.31, "eval_bleu": 49.5551, "eval_gen_len": 18.2672, "eval_loss": 1.3408894538879395, "eval_runtime": 506.5726, "eval_samples_per_second": 15.202, "eval_steps_per_second": 0.951, "step": 16000 }, { "epoch": 8.57, "learning_rate": 7.2178477690288725e-06, "loss": 0.0579, "step": 16500 }, { "epoch": 8.83, "learning_rate": 5.905511811023622e-06, "loss": 0.0551, "step": 17000 }, { "epoch": 9.09, "learning_rate": 4.593175853018373e-06, "loss": 0.0558, "step": 17500 }, { "epoch": 9.14, "eval_bleu": 49.7808, "eval_gen_len": 18.2815, "eval_loss": 1.3544921875, "eval_runtime": 505.3645, "eval_samples_per_second": 15.239, "eval_steps_per_second": 0.954, "step": 17600 }, { "epoch": 9.35, "learning_rate": 3.2808398950131235e-06, "loss": 0.0456, "step": 18000 }, { "epoch": 9.61, "learning_rate": 1.968503937007874e-06, "loss": 0.0433, "step": 18500 }, { "epoch": 9.87, "learning_rate": 6.561679790026247e-07, "loss": 0.0442, "step": 19000 }, { "epoch": 9.97, "eval_bleu": 50.2114, "eval_gen_len": 18.2159, "eval_loss": 1.3559678792953491, "eval_runtime": 503.7841, "eval_samples_per_second": 15.286, "eval_steps_per_second": 0.957, "step": 19200 }, { "epoch": 10.0, "step": 19250, "total_flos": 1.334951937048576e+18, "train_loss": 0.4018082245665711, "train_runtime": 39148.2976, "train_samples_per_second": 15.739, "train_steps_per_second": 0.492 } ], "logging_steps": 500, "max_steps": 19250, "num_train_epochs": 10, "save_steps": 1600, "total_flos": 1.334951937048576e+18, "trial_name": null, "trial_params": null }