{ "best_metric": 3.1917154788970947, "best_model_checkpoint": "/exports/eddie/scratch/s1970716/models/summarization/longt5_xl_summ_screen_20/checkpoint-28", "epoch": 6.956521739130435, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 0.001, "loss": 0.411, "step": 2 }, { "epoch": 0.28, "learning_rate": 0.001, "loss": 0.4642, "step": 4 }, { "epoch": 0.42, "learning_rate": 0.001, "loss": 0.4965, "step": 6 }, { "epoch": 0.56, "learning_rate": 0.001, "loss": 0.7455, "step": 8 }, { "epoch": 0.7, "learning_rate": 0.001, "loss": 0.4501, "step": 10 }, { "epoch": 0.83, "learning_rate": 0.001, "loss": 0.3804, "step": 12 }, { "epoch": 0.97, "learning_rate": 0.001, "loss": 0.4063, "step": 14 }, { "epoch": 0.97, "eval_gen_len": 71.90828402366864, "eval_loss": 3.7384819984436035, "eval_rouge1": 27.9171, "eval_rouge2": 6.7215, "eval_rougeL": 17.9315, "eval_rougeLsum": 24.363, "eval_runtime": 823.0541, "eval_samples_per_second": 0.411, "eval_steps_per_second": 0.205, "step": 14 }, { "epoch": 1.11, "learning_rate": 0.001, "loss": 0.3201, "step": 16 }, { "epoch": 1.25, "learning_rate": 0.001, "loss": 0.3253, "step": 18 }, { "epoch": 1.39, "learning_rate": 0.001, "loss": 0.3215, "step": 20 }, { "epoch": 1.53, "learning_rate": 0.001, "loss": 0.3175, "step": 22 }, { "epoch": 1.67, "learning_rate": 0.001, "loss": 0.3331, "step": 24 }, { "epoch": 1.81, "learning_rate": 0.001, "loss": 0.2811, "step": 26 }, { "epoch": 1.95, "learning_rate": 0.001, "loss": 0.3125, "step": 28 }, { "epoch": 1.95, "eval_gen_len": 96.20414201183432, "eval_loss": 3.1917154788970947, "eval_rouge1": 28.1708, "eval_rouge2": 6.6895, "eval_rougeL": 18.1637, "eval_rougeLsum": 24.3987, "eval_runtime": 1069.4844, "eval_samples_per_second": 0.316, "eval_steps_per_second": 0.158, "step": 28 }, { "epoch": 2.09, "learning_rate": 0.001, "loss": 0.2621, "step": 30 }, { "epoch": 2.23, "learning_rate": 0.001, "loss": 0.2194, "step": 32 }, { "epoch": 2.37, "learning_rate": 0.001, "loss": 0.2386, "step": 34 }, { "epoch": 2.5, "learning_rate": 0.001, "loss": 0.2264, "step": 36 }, { "epoch": 2.64, "learning_rate": 0.001, "loss": 0.2002, "step": 38 }, { "epoch": 2.78, "learning_rate": 0.001, "loss": 0.2477, "step": 40 }, { "epoch": 2.92, "learning_rate": 0.001, "loss": 0.2177, "step": 42 }, { "epoch": 2.99, "eval_gen_len": 198.0473372781065, "eval_loss": 3.9997544288635254, "eval_rouge1": 29.3167, "eval_rouge2": 5.9, "eval_rougeL": 17.3608, "eval_rougeLsum": 25.6945, "eval_runtime": 1900.1301, "eval_samples_per_second": 0.178, "eval_steps_per_second": 0.089, "step": 43 }, { "epoch": 3.06, "learning_rate": 0.001, "loss": 0.2069, "step": 44 }, { "epoch": 3.2, "learning_rate": 0.001, "loss": 0.164, "step": 46 }, { "epoch": 3.34, "learning_rate": 0.001, "loss": 0.1679, "step": 48 }, { "epoch": 3.48, "learning_rate": 0.001, "loss": 0.1736, "step": 50 }, { "epoch": 3.62, "learning_rate": 0.001, "loss": 0.1688, "step": 52 }, { "epoch": 3.76, "learning_rate": 0.001, "loss": 0.1749, "step": 54 }, { "epoch": 3.9, "learning_rate": 0.001, "loss": 0.1753, "step": 56 }, { "epoch": 3.97, "eval_gen_len": 158.6508875739645, "eval_loss": 4.228714466094971, "eval_rouge1": 29.0605, "eval_rouge2": 6.2534, "eval_rougeL": 17.5744, "eval_rougeLsum": 25.6415, "eval_runtime": 1492.9623, "eval_samples_per_second": 0.226, "eval_steps_per_second": 0.113, "step": 57 }, { "epoch": 4.03, "learning_rate": 0.001, "loss": 0.1656, "step": 58 }, { "epoch": 4.17, "learning_rate": 0.001, "loss": 0.1144, "step": 60 }, { "epoch": 4.31, "learning_rate": 0.001, "loss": 0.161, "step": 62 }, { "epoch": 4.45, "learning_rate": 0.001, "loss": 0.2169, "step": 64 }, { "epoch": 4.59, "learning_rate": 0.001, "loss": 0.1943, "step": 66 }, { "epoch": 4.73, "learning_rate": 0.001, "loss": 0.1777, "step": 68 }, { "epoch": 4.87, "learning_rate": 0.001, "loss": 0.2747, "step": 70 }, { "epoch": 4.94, "eval_gen_len": 118.44378698224853, "eval_loss": 4.102721214294434, "eval_rouge1": 31.2245, "eval_rouge2": 6.5663, "eval_rougeL": 18.1588, "eval_rougeLsum": 26.8996, "eval_runtime": 1188.6007, "eval_samples_per_second": 0.284, "eval_steps_per_second": 0.142, "step": 71 }, { "epoch": 5.01, "learning_rate": 0.001, "loss": 0.1399, "step": 72 }, { "epoch": 5.15, "learning_rate": 0.001, "loss": 0.0986, "step": 74 }, { "epoch": 5.29, "learning_rate": 0.001, "loss": 0.1051, "step": 76 }, { "epoch": 5.43, "learning_rate": 0.001, "loss": 0.1288, "step": 78 }, { "epoch": 5.57, "learning_rate": 0.001, "loss": 0.1097, "step": 80 }, { "epoch": 5.7, "learning_rate": 0.001, "loss": 0.1163, "step": 82 }, { "epoch": 5.84, "learning_rate": 0.001, "loss": 0.1205, "step": 84 }, { "epoch": 5.98, "learning_rate": 0.001, "loss": 0.1045, "step": 86 }, { "epoch": 5.98, "eval_gen_len": 92.98816568047337, "eval_loss": 5.058135986328125, "eval_rouge1": 30.6056, "eval_rouge2": 6.8892, "eval_rougeL": 18.4933, "eval_rougeLsum": 26.4027, "eval_runtime": 984.3965, "eval_samples_per_second": 0.343, "eval_steps_per_second": 0.172, "step": 86 }, { "epoch": 6.12, "learning_rate": 0.001, "loss": 0.0767, "step": 88 }, { "epoch": 6.26, "learning_rate": 0.001, "loss": 0.0678, "step": 90 }, { "epoch": 6.4, "learning_rate": 0.001, "loss": 0.0759, "step": 92 }, { "epoch": 6.54, "learning_rate": 0.001, "loss": 0.0714, "step": 94 }, { "epoch": 6.68, "learning_rate": 0.001, "loss": 0.0822, "step": 96 }, { "epoch": 6.82, "learning_rate": 0.001, "loss": 0.0843, "step": 98 }, { "epoch": 6.96, "learning_rate": 0.001, "loss": 0.0875, "step": 100 }, { "epoch": 6.96, "eval_gen_len": 160.89644970414201, "eval_loss": 4.59414529800415, "eval_rouge1": 32.5234, "eval_rouge2": 7.3736, "eval_rougeL": 18.8958, "eval_rougeLsum": 28.4738, "eval_runtime": 1504.7392, "eval_samples_per_second": 0.225, "eval_steps_per_second": 0.112, "step": 100 } ], "logging_steps": 2, "max_steps": 140, "num_train_epochs": 10, "save_steps": 500, "total_flos": 1.7591398064550052e+18, "trial_name": null, "trial_params": null }