{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "global_step": 35889, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0009860681545877568, "loss": 6.095, "step": 500 }, { "epoch": 0.03, "learning_rate": 0.0009721363091755134, "loss": 2.9616, "step": 1000 }, { "epoch": 0.04, "learning_rate": 0.0009582044637632701, "loss": 2.666, "step": 1500 }, { "epoch": 0.06, "learning_rate": 0.0009442726183510269, "loss": 2.5328, "step": 2000 }, { "epoch": 0.07, "learning_rate": 0.0009303407729387835, "loss": 2.497, "step": 2500 }, { "epoch": 0.08, "learning_rate": 0.0009164089275265401, "loss": 2.4232, "step": 3000 }, { "epoch": 0.1, "learning_rate": 0.0009024770821142969, "loss": 2.4132, "step": 3500 }, { "epoch": 0.11, "learning_rate": 0.0008885452367020536, "loss": 2.3761, "step": 4000 }, { "epoch": 0.13, "learning_rate": 0.0008746133912898102, "loss": 2.3447, "step": 4500 }, { "epoch": 0.14, "learning_rate": 0.000860681545877567, "loss": 2.2711, "step": 5000 }, { "epoch": 0.15, "learning_rate": 0.0008467497004653236, "loss": 2.2602, "step": 5500 }, { "epoch": 0.17, "learning_rate": 0.0008328178550530803, "loss": 2.2645, "step": 6000 }, { "epoch": 0.18, "learning_rate": 0.0008188860096408371, "loss": 2.2384, "step": 6500 }, { "epoch": 0.2, "learning_rate": 0.0008049541642285937, "loss": 2.2474, "step": 7000 }, { "epoch": 0.2, "eval_gen_len": 91.9, "eval_loss": 1.914839744567871, "eval_rouge1": 31.9324, "eval_rouge2": 8.9596, "eval_rougeL": 18.5991, "eval_rougeLsum": 26.8462, "eval_runtime": 20609.4374, "eval_samples_per_second": 0.649, "step": 7000 }, { "epoch": 0.21, "learning_rate": 0.0007910223188163504, "loss": 2.2039, "step": 7500 }, { "epoch": 0.22, "learning_rate": 0.0007770904734041071, "loss": 2.189, "step": 8000 }, { "epoch": 0.24, "learning_rate": 0.0007631586279918639, "loss": 2.1961, "step": 8500 }, { "epoch": 0.25, "learning_rate": 0.0007492267825796205, "loss": 2.1764, "step": 9000 }, { "epoch": 0.26, "learning_rate": 0.0007352949371673772, "loss": 2.1777, "step": 9500 }, { "epoch": 0.28, "learning_rate": 0.0007213630917551339, "loss": 2.1412, "step": 10000 }, { "epoch": 0.29, "learning_rate": 0.0007074312463428905, "loss": 2.1669, "step": 10500 }, { "epoch": 0.31, "learning_rate": 0.0006934994009306474, "loss": 2.1426, "step": 11000 }, { "epoch": 0.32, "learning_rate": 0.000679567555518404, "loss": 2.1428, "step": 11500 }, { "epoch": 0.33, "learning_rate": 0.0006656357101061606, "loss": 2.1108, "step": 12000 }, { "epoch": 0.35, "learning_rate": 0.0006517038646939174, "loss": 2.0903, "step": 12500 }, { "epoch": 0.36, "learning_rate": 0.000637772019281674, "loss": 2.1057, "step": 13000 }, { "epoch": 0.38, "learning_rate": 0.0006238401738694308, "loss": 2.0874, "step": 13500 }, { "epoch": 0.39, "learning_rate": 0.0006099083284571875, "loss": 2.0877, "step": 14000 }, { "epoch": 0.39, "eval_gen_len": 99.3, "eval_loss": 1.8038697242736816, "eval_rouge1": 31.4652, "eval_rouge2": 8.6423, "eval_rougeL": 18.1808, "eval_rougeLsum": 26.2653, "eval_runtime": 21262.825, "eval_samples_per_second": 0.629, "step": 14000 }, { "epoch": 0.4, "learning_rate": 0.0005959764830449441, "loss": 2.0903, "step": 14500 }, { "epoch": 0.42, "learning_rate": 0.0005820446376327009, "loss": 2.0628, "step": 15000 }, { "epoch": 0.43, "learning_rate": 0.0005681127922204575, "loss": 2.0909, "step": 15500 }, { "epoch": 0.45, "learning_rate": 0.0005541809468082142, "loss": 2.0258, "step": 16000 }, { "epoch": 0.46, "learning_rate": 0.000540249101395971, "loss": 2.0156, "step": 16500 }, { "epoch": 0.47, "learning_rate": 0.0005263172559837276, "loss": 2.0282, "step": 17000 }, { "epoch": 0.49, "learning_rate": 0.0005123854105714843, "loss": 2.0559, "step": 17500 }, { "epoch": 0.5, "learning_rate": 0.000498453565159241, "loss": 2.0603, "step": 18000 }, { "epoch": 0.52, "learning_rate": 0.0004845217197469977, "loss": 2.0077, "step": 18500 }, { "epoch": 0.53, "learning_rate": 0.0004705898743347544, "loss": 2.0119, "step": 19000 }, { "epoch": 0.54, "learning_rate": 0.00045665802892251106, "loss": 1.9855, "step": 19500 }, { "epoch": 0.56, "learning_rate": 0.0004427261835102678, "loss": 2.0089, "step": 20000 }, { "epoch": 0.57, "learning_rate": 0.0004287943380980245, "loss": 2.0029, "step": 20500 }, { "epoch": 0.59, "learning_rate": 0.00041486249268578117, "loss": 1.9773, "step": 21000 }, { "epoch": 0.59, "eval_gen_len": 101.6, "eval_loss": 1.73397696018219, "eval_rouge1": 31.1574, "eval_rouge2": 8.645, "eval_rougeL": 18.096, "eval_rougeLsum": 25.9822, "eval_runtime": 21624.4917, "eval_samples_per_second": 0.618, "step": 21000 }, { "epoch": 0.6, "learning_rate": 0.00040093064727353785, "loss": 1.9632, "step": 21500 }, { "epoch": 0.61, "learning_rate": 0.00038699880186129454, "loss": 1.9768, "step": 22000 }, { "epoch": 0.63, "learning_rate": 0.0003730669564490513, "loss": 1.9755, "step": 22500 }, { "epoch": 0.64, "learning_rate": 0.00035913511103680796, "loss": 2.0125, "step": 23000 }, { "epoch": 0.65, "learning_rate": 0.0003452032656245646, "loss": 1.9427, "step": 23500 }, { "epoch": 0.67, "learning_rate": 0.0003312714202123213, "loss": 1.9776, "step": 24000 }, { "epoch": 0.68, "learning_rate": 0.000317339574800078, "loss": 1.9966, "step": 24500 }, { "epoch": 0.7, "learning_rate": 0.00030340772938783475, "loss": 1.9814, "step": 25000 }, { "epoch": 0.71, "learning_rate": 0.00028947588397559143, "loss": 1.9585, "step": 25500 }, { "epoch": 0.72, "learning_rate": 0.00027554403856334806, "loss": 1.9668, "step": 26000 }, { "epoch": 0.74, "learning_rate": 0.0002616121931511048, "loss": 1.9564, "step": 26500 }, { "epoch": 0.75, "learning_rate": 0.0002476803477388615, "loss": 1.9231, "step": 27000 }, { "epoch": 0.77, "learning_rate": 0.00023374850232661817, "loss": 1.9078, "step": 27500 }, { "epoch": 0.78, "learning_rate": 0.00021981665691437488, "loss": 1.9032, "step": 28000 }, { "epoch": 0.78, "eval_gen_len": 106.8, "eval_loss": 1.6808093786239624, "eval_rouge1": 31.4328, "eval_rouge2": 8.6241, "eval_rougeL": 18.0718, "eval_rougeLsum": 26.0718, "eval_runtime": 21979.649, "eval_samples_per_second": 0.608, "step": 28000 }, { "epoch": 0.79, "learning_rate": 0.00020588481150213156, "loss": 1.931, "step": 28500 }, { "epoch": 0.81, "learning_rate": 0.00019195296608988827, "loss": 1.9416, "step": 29000 }, { "epoch": 0.82, "learning_rate": 0.00017802112067764498, "loss": 1.963, "step": 29500 }, { "epoch": 0.84, "learning_rate": 0.00016408927526540167, "loss": 1.9066, "step": 30000 }, { "epoch": 0.85, "learning_rate": 0.00015015742985315835, "loss": 1.9583, "step": 30500 }, { "epoch": 0.86, "learning_rate": 0.00013622558444091503, "loss": 1.8948, "step": 31000 }, { "epoch": 0.88, "learning_rate": 0.00012229373902867174, "loss": 1.9424, "step": 31500 }, { "epoch": 0.89, "learning_rate": 0.00010836189361642844, "loss": 1.9082, "step": 32000 }, { "epoch": 0.91, "learning_rate": 9.443004820418513e-05, "loss": 1.9195, "step": 32500 }, { "epoch": 0.92, "learning_rate": 8.049820279194182e-05, "loss": 1.8821, "step": 33000 }, { "epoch": 0.93, "learning_rate": 6.656635737969852e-05, "loss": 1.9194, "step": 33500 }, { "epoch": 0.95, "learning_rate": 5.263451196745521e-05, "loss": 1.902, "step": 34000 }, { "epoch": 0.96, "learning_rate": 3.87026665552119e-05, "loss": 1.9154, "step": 34500 }, { "epoch": 0.98, "learning_rate": 2.4770821142968598e-05, "loss": 1.9181, "step": 35000 }, { "epoch": 0.98, "eval_gen_len": 107.9, "eval_loss": 1.6517904996871948, "eval_rouge1": 31.4185, "eval_rouge2": 8.601, "eval_rougeL": 17.9686, "eval_rougeLsum": 26.0844, "eval_runtime": 21981.6496, "eval_samples_per_second": 0.608, "step": 35000 }, { "epoch": 0.99, "learning_rate": 1.0838975730725292e-05, "loss": 1.9275, "step": 35500 }, { "epoch": 1.0, "step": 35889, "total_flos": 1069022126774016000, "train_runtime": 160064.9582, "train_samples_per_second": 0.224 } ], "max_steps": 35889, "num_train_epochs": 1, "total_flos": 1069022126774016000, "trial_name": null, "trial_params": null }