{ "best_metric": 2.132361888885498, "best_model_checkpoint": "./26-125356_megasuperkanin/checkpoint-100000", "epoch": 0.9769822970807769, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 5e-05, "loss": 2.6761, "step": 2500 }, { "epoch": 0.05, "learning_rate": 5e-05, "loss": 2.551, "step": 5000 }, { "epoch": 0.05, "eval_gen_len": 28.4674, "eval_loss": 2.423037052154541, "eval_rouge1": 0.214, "eval_rouge2": 0.0668, "eval_rougeL": 0.1717, "eval_rougeLsum": 0.1777, "eval_runtime": 1015.6418, "eval_samples_per_second": 2.265, "eval_steps_per_second": 0.284, "step": 5000 }, { "epoch": 0.07, "learning_rate": 5e-05, "loss": 2.5186, "step": 7500 }, { "epoch": 0.1, "learning_rate": 5e-05, "loss": 2.4717, "step": 10000 }, { "epoch": 0.1, "eval_gen_len": 25.6604, "eval_loss": 2.3709843158721924, "eval_rouge1": 0.2071, "eval_rouge2": 0.0634, "eval_rougeL": 0.1686, "eval_rougeLsum": 0.1745, "eval_runtime": 951.1096, "eval_samples_per_second": 2.418, "eval_steps_per_second": 0.303, "step": 10000 }, { "epoch": 0.12, "learning_rate": 5e-05, "loss": 2.4593, "step": 12500 }, { "epoch": 0.15, "learning_rate": 5e-05, "loss": 2.4281, "step": 15000 }, { "epoch": 0.15, "eval_gen_len": 28.8296, "eval_loss": 2.3228819370269775, "eval_rouge1": 0.2137, "eval_rouge2": 0.0662, "eval_rougeL": 0.1711, "eval_rougeLsum": 0.1768, "eval_runtime": 1022.9494, "eval_samples_per_second": 2.248, "eval_steps_per_second": 0.282, "step": 15000 }, { "epoch": 0.17, "learning_rate": 5e-05, "loss": 2.4049, "step": 17500 }, { "epoch": 0.2, "learning_rate": 5e-05, "loss": 2.3735, "step": 20000 }, { "epoch": 0.2, "eval_gen_len": 29.9183, "eval_loss": 2.2881429195404053, "eval_rouge1": 0.2164, "eval_rouge2": 0.0668, "eval_rougeL": 0.1735, "eval_rougeLsum": 0.1808, "eval_runtime": 1036.2984, "eval_samples_per_second": 2.219, "eval_steps_per_second": 0.278, "step": 20000 }, { "epoch": 0.22, "learning_rate": 5e-05, "loss": 2.3732, "step": 22500 }, { "epoch": 0.24, "learning_rate": 5e-05, "loss": 2.377, "step": 25000 }, { "epoch": 0.24, "eval_gen_len": 29.5183, "eval_loss": 2.2759358882904053, "eval_rouge1": 0.2209, "eval_rouge2": 0.0694, "eval_rougeL": 0.1782, "eval_rougeLsum": 0.1851, "eval_runtime": 1036.1071, "eval_samples_per_second": 2.22, "eval_steps_per_second": 0.278, "step": 25000 }, { "epoch": 0.27, "learning_rate": 5e-05, "loss": 2.3513, "step": 27500 }, { "epoch": 0.29, "learning_rate": 5e-05, "loss": 2.3444, "step": 30000 }, { "epoch": 0.29, "eval_gen_len": 29.3183, "eval_loss": 2.2552034854888916, "eval_rouge1": 0.2194, "eval_rouge2": 0.0679, "eval_rougeL": 0.1757, "eval_rougeLsum": 0.1829, "eval_runtime": 1037.4604, "eval_samples_per_second": 2.217, "eval_steps_per_second": 0.278, "step": 30000 }, { "epoch": 0.32, "learning_rate": 5e-05, "loss": 2.3504, "step": 32500 }, { "epoch": 0.34, "learning_rate": 5e-05, "loss": 2.3203, "step": 35000 }, { "epoch": 0.34, "eval_gen_len": 32.2061, "eval_loss": 2.235518455505371, "eval_rouge1": 0.2284, "eval_rouge2": 0.0722, "eval_rougeL": 0.1819, "eval_rougeLsum": 0.1892, "eval_runtime": 1121.1561, "eval_samples_per_second": 2.051, "eval_steps_per_second": 0.257, "step": 35000 }, { "epoch": 0.37, "learning_rate": 5e-05, "loss": 2.3087, "step": 37500 }, { "epoch": 0.39, "learning_rate": 5e-05, "loss": 2.3132, "step": 40000 }, { "epoch": 0.39, "eval_gen_len": 29.5452, "eval_loss": 2.2289836406707764, "eval_rouge1": 0.2183, "eval_rouge2": 0.0673, "eval_rougeL": 0.1759, "eval_rougeLsum": 0.1827, "eval_runtime": 1055.2895, "eval_samples_per_second": 2.179, "eval_steps_per_second": 0.273, "step": 40000 }, { "epoch": 0.42, "learning_rate": 5e-05, "loss": 2.3063, "step": 42500 }, { "epoch": 0.44, "learning_rate": 5e-05, "loss": 2.3116, "step": 45000 }, { "epoch": 0.44, "eval_gen_len": 30.2935, "eval_loss": 2.218207359313965, "eval_rouge1": 0.2239, "eval_rouge2": 0.07, "eval_rougeL": 0.1798, "eval_rougeLsum": 0.1879, "eval_runtime": 1063.5185, "eval_samples_per_second": 2.163, "eval_steps_per_second": 0.271, "step": 45000 }, { "epoch": 0.46, "learning_rate": 5e-05, "loss": 2.3014, "step": 47500 }, { "epoch": 0.49, "learning_rate": 5e-05, "loss": 2.2852, "step": 50000 }, { "epoch": 0.49, "eval_gen_len": 28.6443, "eval_loss": 2.2090706825256348, "eval_rouge1": 0.2251, "eval_rouge2": 0.0703, "eval_rougeL": 0.1812, "eval_rougeLsum": 0.1887, "eval_runtime": 1045.7282, "eval_samples_per_second": 2.199, "eval_steps_per_second": 0.275, "step": 50000 }, { "epoch": 0.51, "learning_rate": 5e-05, "loss": 2.2963, "step": 52500 }, { "epoch": 0.54, "learning_rate": 5e-05, "loss": 2.2683, "step": 55000 }, { "epoch": 0.54, "eval_gen_len": 29.9661, "eval_loss": 2.1879115104675293, "eval_rouge1": 0.2257, "eval_rouge2": 0.0716, "eval_rougeL": 0.1806, "eval_rougeLsum": 0.1876, "eval_runtime": 1061.3075, "eval_samples_per_second": 2.167, "eval_steps_per_second": 0.271, "step": 55000 }, { "epoch": 0.56, "learning_rate": 5e-05, "loss": 2.2735, "step": 57500 }, { "epoch": 0.59, "learning_rate": 5e-05, "loss": 2.2614, "step": 60000 }, { "epoch": 0.59, "eval_gen_len": 30.4435, "eval_loss": 2.1871089935302734, "eval_rouge1": 0.2316, "eval_rouge2": 0.075, "eval_rougeL": 0.1863, "eval_rougeLsum": 0.1936, "eval_runtime": 1083.7377, "eval_samples_per_second": 2.122, "eval_steps_per_second": 0.266, "step": 60000 }, { "epoch": 0.61, "learning_rate": 5e-05, "loss": 2.2735, "step": 62500 }, { "epoch": 0.64, "learning_rate": 5e-05, "loss": 2.252, "step": 65000 }, { "epoch": 0.64, "eval_gen_len": 30.6239, "eval_loss": 2.175469160079956, "eval_rouge1": 0.226, "eval_rouge2": 0.0729, "eval_rougeL": 0.1834, "eval_rougeLsum": 0.1914, "eval_runtime": 1080.4009, "eval_samples_per_second": 2.129, "eval_steps_per_second": 0.267, "step": 65000 }, { "epoch": 0.66, "learning_rate": 5e-05, "loss": 2.2509, "step": 67500 }, { "epoch": 0.68, "learning_rate": 5e-05, "loss": 2.262, "step": 70000 }, { "epoch": 0.68, "eval_gen_len": 30.9983, "eval_loss": 2.16789174079895, "eval_rouge1": 0.2256, "eval_rouge2": 0.0716, "eval_rougeL": 0.1815, "eval_rougeLsum": 0.1889, "eval_runtime": 1104.0224, "eval_samples_per_second": 2.083, "eval_steps_per_second": 0.261, "step": 70000 }, { "epoch": 0.71, "learning_rate": 5e-05, "loss": 2.2398, "step": 72500 }, { "epoch": 0.73, "learning_rate": 5e-05, "loss": 2.228, "step": 75000 }, { "epoch": 0.73, "eval_gen_len": 29.9704, "eval_loss": 2.1669178009033203, "eval_rouge1": 0.2253, "eval_rouge2": 0.0725, "eval_rougeL": 0.1822, "eval_rougeLsum": 0.1894, "eval_runtime": 1052.7669, "eval_samples_per_second": 2.185, "eval_steps_per_second": 0.274, "step": 75000 }, { "epoch": 0.76, "learning_rate": 5e-05, "loss": 2.25, "step": 77500 }, { "epoch": 0.78, "learning_rate": 5e-05, "loss": 2.234, "step": 80000 }, { "epoch": 0.78, "eval_gen_len": 29.4826, "eval_loss": 2.1604671478271484, "eval_rouge1": 0.2283, "eval_rouge2": 0.0747, "eval_rougeL": 0.1855, "eval_rougeLsum": 0.1937, "eval_runtime": 1075.8159, "eval_samples_per_second": 2.138, "eval_steps_per_second": 0.268, "step": 80000 }, { "epoch": 0.81, "learning_rate": 5e-05, "loss": 2.236, "step": 82500 }, { "epoch": 0.83, "learning_rate": 5e-05, "loss": 2.2289, "step": 85000 }, { "epoch": 0.83, "eval_gen_len": 30.0213, "eval_loss": 2.1517326831817627, "eval_rouge1": 0.2226, "eval_rouge2": 0.0705, "eval_rougeL": 0.1801, "eval_rougeLsum": 0.1873, "eval_runtime": 1072.8178, "eval_samples_per_second": 2.144, "eval_steps_per_second": 0.268, "step": 85000 }, { "epoch": 0.85, "learning_rate": 5e-05, "loss": 2.2214, "step": 87500 }, { "epoch": 0.88, "learning_rate": 5e-05, "loss": 2.2043, "step": 90000 }, { "epoch": 0.88, "eval_gen_len": 29.5361, "eval_loss": 2.1455490589141846, "eval_rouge1": 0.2265, "eval_rouge2": 0.075, "eval_rougeL": 0.1838, "eval_rougeLsum": 0.1908, "eval_runtime": 1058.731, "eval_samples_per_second": 2.172, "eval_steps_per_second": 0.272, "step": 90000 }, { "epoch": 0.9, "learning_rate": 5e-05, "loss": 2.2419, "step": 92500 }, { "epoch": 0.93, "learning_rate": 5e-05, "loss": 2.2259, "step": 95000 }, { "epoch": 0.93, "eval_gen_len": 29.6874, "eval_loss": 2.1389129161834717, "eval_rouge1": 0.2287, "eval_rouge2": 0.0713, "eval_rougeL": 0.1844, "eval_rougeLsum": 0.1911, "eval_runtime": 1069.2344, "eval_samples_per_second": 2.151, "eval_steps_per_second": 0.269, "step": 95000 }, { "epoch": 0.95, "learning_rate": 5e-05, "loss": 2.2202, "step": 97500 }, { "epoch": 0.98, "learning_rate": 5e-05, "loss": 2.2307, "step": 100000 }, { "epoch": 0.98, "eval_gen_len": 30.7513, "eval_loss": 2.132361888885498, "eval_rouge1": 0.2293, "eval_rouge2": 0.0741, "eval_rougeL": 0.1845, "eval_rougeLsum": 0.1924, "eval_runtime": 1089.9927, "eval_samples_per_second": 2.11, "eval_steps_per_second": 0.264, "step": 100000 } ], "max_steps": 102356, "num_train_epochs": 1, "total_flos": 1.8696291573252096e+17, "trial_name": null, "trial_params": null }