{ "best_metric": 41.3073, "best_model_checkpoint": "models/mt0-xl_english_adafactor/checkpoint-27400", "epoch": 20.0, "global_step": 27400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.75e-05, "loss": 2.1171, "step": 1370 }, { "epoch": 1.0, "eval_gen_len": 11.879845515662995, "eval_loss": 1.8174540996551514, "eval_rouge1": 27.0261, "eval_rouge2": 8.6429, "eval_rougeL": 25.2826, "eval_rougeLsum": 25.2952, "eval_runtime": 356.3645, "eval_samples_per_second": 39.235, "eval_steps_per_second": 1.226, "step": 1370 }, { "epoch": 2.0, "learning_rate": 4.5e-05, "loss": 1.8186, "step": 2740 }, { "epoch": 2.0, "eval_gen_len": 11.791946788728366, "eval_loss": 1.7111847400665283, "eval_rouge1": 29.1583, "eval_rouge2": 9.9747, "eval_rougeL": 27.3432, "eval_rougeLsum": 27.3647, "eval_runtime": 305.4337, "eval_samples_per_second": 45.778, "eval_steps_per_second": 1.431, "step": 2740 }, { "epoch": 3.0, "learning_rate": 4.25e-05, "loss": 1.643, "step": 4110 }, { "epoch": 3.0, "eval_gen_len": 12.412530396223715, "eval_loss": 1.6442108154296875, "eval_rouge1": 30.9045, "eval_rouge2": 11.2256, "eval_rougeL": 28.7826, "eval_rougeLsum": 28.788, "eval_runtime": 308.7201, "eval_samples_per_second": 45.29, "eval_steps_per_second": 1.416, "step": 4110 }, { "epoch": 4.0, "learning_rate": 4e-05, "loss": 1.499, "step": 5480 }, { "epoch": 4.0, "eval_gen_len": 12.31290230296095, "eval_loss": 1.5977873802185059, "eval_rouge1": 32.1126, "eval_rouge2": 12.6674, "eval_rougeL": 29.97, "eval_rougeLsum": 29.9843, "eval_runtime": 307.939, "eval_samples_per_second": 45.405, "eval_steps_per_second": 1.419, "step": 5480 }, { "epoch": 5.0, "learning_rate": 3.7500000000000003e-05, "loss": 1.3772, "step": 6850 }, { "epoch": 5.0, "eval_gen_len": 12.688742669146045, "eval_loss": 1.571955680847168, "eval_rouge1": 33.6113, "eval_rouge2": 13.8451, "eval_rougeL": 31.3468, "eval_rougeLsum": 31.3599, "eval_runtime": 320.2331, "eval_samples_per_second": 43.662, "eval_steps_per_second": 1.365, "step": 6850 }, { "epoch": 6.0, "learning_rate": 3.5e-05, "loss": 1.2742, "step": 8220 }, { "epoch": 6.0, "eval_gen_len": 12.200257473895007, "eval_loss": 1.556434988975525, "eval_rouge1": 34.4899, "eval_rouge2": 15.1005, "eval_rougeL": 32.3177, "eval_rougeLsum": 32.3291, "eval_runtime": 308.9469, "eval_samples_per_second": 45.257, "eval_steps_per_second": 1.414, "step": 8220 }, { "epoch": 7.0, "learning_rate": 3.2500000000000004e-05, "loss": 1.1785, "step": 9590 }, { "epoch": 7.0, "eval_gen_len": 12.4487197825776, "eval_loss": 1.5466481447219849, "eval_rouge1": 35.4729, "eval_rouge2": 16.2035, "eval_rougeL": 33.2166, "eval_rougeLsum": 33.2295, "eval_runtime": 305.713, "eval_samples_per_second": 45.736, "eval_steps_per_second": 1.429, "step": 9590 }, { "epoch": 8.0, "learning_rate": 3e-05, "loss": 1.0941, "step": 10960 }, { "epoch": 8.0, "eval_gen_len": 12.754255471320269, "eval_loss": 1.5571105480194092, "eval_rouge1": 36.4885, "eval_rouge2": 17.5396, "eval_rougeL": 34.2494, "eval_rougeLsum": 34.2759, "eval_runtime": 308.476, "eval_samples_per_second": 45.326, "eval_steps_per_second": 1.417, "step": 10960 }, { "epoch": 9.0, "learning_rate": 2.7500000000000004e-05, "loss": 1.0202, "step": 12330 }, { "epoch": 9.0, "eval_gen_len": 12.860320411958233, "eval_loss": 1.5540790557861328, "eval_rouge1": 37.4019, "eval_rouge2": 18.5568, "eval_rougeL": 35.1341, "eval_rougeLsum": 35.1473, "eval_runtime": 308.6475, "eval_samples_per_second": 45.301, "eval_steps_per_second": 1.416, "step": 12330 }, { "epoch": 10.0, "learning_rate": 2.5e-05, "loss": 0.9552, "step": 13700 }, { "epoch": 10.0, "eval_gen_len": 12.698684022314405, "eval_loss": 1.5641505718231201, "eval_rouge1": 38.127, "eval_rouge2": 19.4057, "eval_rougeL": 35.9008, "eval_rougeLsum": 35.9163, "eval_runtime": 307.6813, "eval_samples_per_second": 45.443, "eval_steps_per_second": 1.42, "step": 13700 }, { "epoch": 11.0, "learning_rate": 2.25e-05, "loss": 0.8963, "step": 15070 }, { "epoch": 11.0, "eval_gen_len": 12.705192390215991, "eval_loss": 1.5771721601486206, "eval_rouge1": 38.5073, "eval_rouge2": 20.0584, "eval_rougeL": 36.3304, "eval_rougeLsum": 36.3399, "eval_runtime": 480.883, "eval_samples_per_second": 29.076, "eval_steps_per_second": 0.909, "step": 15070 }, { "epoch": 12.0, "learning_rate": 2e-05, "loss": 0.8443, "step": 16440 }, { "epoch": 12.0, "eval_gen_len": 13.039479330567874, "eval_loss": 1.595460057258606, "eval_rouge1": 39.2323, "eval_rouge2": 20.9237, "eval_rougeL": 36.9863, "eval_rougeLsum": 37.0049, "eval_runtime": 314.6611, "eval_samples_per_second": 44.435, "eval_steps_per_second": 1.389, "step": 16440 }, { "epoch": 13.0, "learning_rate": 1.75e-05, "loss": 0.7982, "step": 17810 }, { "epoch": 13.0, "eval_gen_len": 13.140037190673723, "eval_loss": 1.608866810798645, "eval_rouge1": 39.7947, "eval_rouge2": 21.6422, "eval_rougeL": 37.5619, "eval_rougeLsum": 37.5815, "eval_runtime": 465.3407, "eval_samples_per_second": 30.047, "eval_steps_per_second": 0.939, "step": 17810 }, { "epoch": 14.0, "learning_rate": 1.5e-05, "loss": 0.7586, "step": 19180 }, { "epoch": 14.0, "eval_gen_len": 12.85889000143041, "eval_loss": 1.6293412446975708, "eval_rouge1": 40.2922, "eval_rouge2": 22.2301, "eval_rougeL": 38.0755, "eval_rougeLsum": 38.0757, "eval_runtime": 312.4581, "eval_samples_per_second": 44.748, "eval_steps_per_second": 1.399, "step": 19180 }, { "epoch": 15.0, "learning_rate": 1.25e-05, "loss": 0.7234, "step": 20550 }, { "epoch": 15.0, "eval_gen_len": 13.110213131168646, "eval_loss": 1.6492763757705688, "eval_rouge1": 40.6358, "eval_rouge2": 22.5355, "eval_rougeL": 38.3523, "eval_rougeLsum": 38.3659, "eval_runtime": 307.5457, "eval_samples_per_second": 45.463, "eval_steps_per_second": 1.421, "step": 20550 }, { "epoch": 16.0, "learning_rate": 1e-05, "loss": 0.6946, "step": 21920 }, { "epoch": 16.0, "eval_gen_len": 13.103490201687885, "eval_loss": 1.6700669527053833, "eval_rouge1": 40.7708, "eval_rouge2": 22.906, "eval_rougeL": 38.5037, "eval_rougeLsum": 38.5174, "eval_runtime": 306.4464, "eval_samples_per_second": 45.626, "eval_steps_per_second": 1.426, "step": 21920 }, { "epoch": 17.0, "learning_rate": 7.5e-06, "loss": 0.6688, "step": 23290 }, { "epoch": 17.0, "eval_gen_len": 13.295093691889573, "eval_loss": 1.690183162689209, "eval_rouge1": 41.0847, "eval_rouge2": 23.1663, "eval_rougeL": 38.8126, "eval_rougeLsum": 38.8149, "eval_runtime": 309.6148, "eval_samples_per_second": 45.159, "eval_steps_per_second": 1.411, "step": 23290 }, { "epoch": 18.0, "learning_rate": 5e-06, "loss": 0.6484, "step": 24660 }, { "epoch": 18.0, "eval_gen_len": 13.270705192390215, "eval_loss": 1.7005170583724976, "eval_rouge1": 41.2075, "eval_rouge2": 23.3967, "eval_rougeL": 38.9529, "eval_rougeLsum": 38.9545, "eval_runtime": 310.6615, "eval_samples_per_second": 45.007, "eval_steps_per_second": 1.407, "step": 24660 }, { "epoch": 19.0, "learning_rate": 2.5e-06, "loss": 0.6342, "step": 26030 }, { "epoch": 19.0, "eval_gen_len": 13.217279359176084, "eval_loss": 1.7115505933761597, "eval_rouge1": 41.2454, "eval_rouge2": 23.5187, "eval_rougeL": 39.0203, "eval_rougeLsum": 39.0396, "eval_runtime": 310.561, "eval_samples_per_second": 45.022, "eval_steps_per_second": 1.407, "step": 26030 }, { "epoch": 20.0, "learning_rate": 0.0, "loss": 0.6234, "step": 27400 }, { "epoch": 20.0, "eval_gen_len": 13.255757402374481, "eval_loss": 1.7210286855697632, "eval_rouge1": 41.3073, "eval_rouge2": 23.5691, "eval_rougeL": 39.0662, "eval_rougeLsum": 39.074, "eval_runtime": 482.7425, "eval_samples_per_second": 28.964, "eval_steps_per_second": 0.905, "step": 27400 }, { "epoch": 20.0, "step": 27400, "total_flos": 4.389256654374306e+18, "train_loss": 1.0633717825116902, "train_runtime": 55751.6068, "train_samples_per_second": 62.898, "train_steps_per_second": 0.491 } ], "max_steps": 27400, "num_train_epochs": 20, "total_flos": 4.389256654374306e+18, "trial_name": null, "trial_params": null }