{ "best_metric": 2.0710394382476807, "best_model_checkpoint": "./checkpoints/barthez-deft-chimie/checkpoint-1062", "epoch": 20.0, "global_step": 2360, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.85, "learning_rate": 2.8805084745762714e-05, "loss": 3.8022, "step": 100 }, { "epoch": 1.0, "eval_gen_len": 19.1538, "eval_loss": 2.5490684509277344, "eval_rouge1": 16.8208, "eval_rouge2": 7.0027, "eval_rougeL": 13.957, "eval_rougeLsum": 14.0479, "eval_runtime": 3.9642, "eval_samples_per_second": 19.676, "eval_steps_per_second": 5.045, "step": 118 }, { "epoch": 1.69, "learning_rate": 2.7533898305084746e-05, "loss": 2.9286, "step": 200 }, { "epoch": 2.0, "eval_gen_len": 19.9487, "eval_loss": 2.3073835372924805, "eval_rouge1": 17.5356, "eval_rouge2": 7.8717, "eval_rougeL": 14.4874, "eval_rougeLsum": 14.5044, "eval_runtime": 3.9919, "eval_samples_per_second": 19.54, "eval_steps_per_second": 5.01, "step": 236 }, { "epoch": 2.54, "learning_rate": 2.627542372881356e-05, "loss": 2.5422, "step": 300 }, { "epoch": 3.0, "eval_gen_len": 19.7051, "eval_loss": 2.232180118560791, "eval_rouge1": 19.6491, "eval_rouge2": 9.4156, "eval_rougeL": 15.9467, "eval_rougeLsum": 15.9433, "eval_runtime": 3.9423, "eval_samples_per_second": 19.785, "eval_steps_per_second": 5.073, "step": 354 }, { "epoch": 3.39, "learning_rate": 2.5004237288135596e-05, "loss": 2.398, "step": 400 }, { "epoch": 4.0, "eval_gen_len": 19.9231, "eval_loss": 2.150047540664673, "eval_rouge1": 18.7166, "eval_rouge2": 9.859, "eval_rougeL": 15.7535, "eval_rougeLsum": 15.8036, "eval_runtime": 3.904, "eval_samples_per_second": 19.98, "eval_steps_per_second": 5.123, "step": 472 }, { "epoch": 4.24, "learning_rate": 2.373305084745763e-05, "loss": 2.2044, "step": 500 }, { "epoch": 5.0, "eval_gen_len": 19.6154, "eval_loss": 2.1371994018554688, "eval_rouge1": 19.978, "eval_rouge2": 10.6235, "eval_rougeL": 16.1348, "eval_rougeLsum": 16.1274, "eval_runtime": 3.9032, "eval_samples_per_second": 19.984, "eval_steps_per_second": 5.124, "step": 590 }, { "epoch": 5.08, "learning_rate": 2.2461864406779662e-05, "loss": 2.0419, "step": 600 }, { "epoch": 5.93, "learning_rate": 2.1203389830508474e-05, "loss": 1.9405, "step": 700 }, { "epoch": 6.0, "eval_gen_len": 19.9744, "eval_loss": 2.099212884902954, "eval_rouge1": 20.226, "eval_rouge2": 10.551, "eval_rougeL": 16.6928, "eval_rougeLsum": 16.7211, "eval_runtime": 3.9012, "eval_samples_per_second": 19.994, "eval_steps_per_second": 5.127, "step": 708 }, { "epoch": 6.78, "learning_rate": 1.993220338983051e-05, "loss": 1.8544, "step": 800 }, { "epoch": 7.0, "eval_gen_len": 19.8846, "eval_loss": 2.0841410160064697, "eval_rouge1": 19.8869, "eval_rouge2": 10.8456, "eval_rougeL": 16.1072, "eval_rougeLsum": 16.097, "eval_runtime": 3.8835, "eval_samples_per_second": 20.085, "eval_steps_per_second": 5.15, "step": 826 }, { "epoch": 7.63, "learning_rate": 1.866101694915254e-05, "loss": 1.7536, "step": 900 }, { "epoch": 8.0, "eval_gen_len": 19.859, "eval_loss": 2.0790863037109375, "eval_rouge1": 19.3017, "eval_rouge2": 9.4921, "eval_rougeL": 16.1541, "eval_rougeLsum": 16.2167, "eval_runtime": 3.9024, "eval_samples_per_second": 19.988, "eval_steps_per_second": 5.125, "step": 944 }, { "epoch": 8.47, "learning_rate": 1.7389830508474576e-05, "loss": 1.6914, "step": 1000 }, { "epoch": 9.0, "eval_gen_len": 19.8846, "eval_loss": 2.0710394382476807, "eval_rouge1": 21.3848, "eval_rouge2": 10.4088, "eval_rougeL": 17.1963, "eval_rougeLsum": 17.2254, "eval_runtime": 3.8762, "eval_samples_per_second": 20.123, "eval_steps_per_second": 5.16, "step": 1062 }, { "epoch": 9.32, "learning_rate": 1.611864406779661e-05, "loss": 1.654, "step": 1100 }, { "epoch": 10.0, "eval_gen_len": 19.9231, "eval_loss": 2.106861114501953, "eval_rouge1": 22.3811, "eval_rouge2": 10.7987, "eval_rougeL": 18.7595, "eval_rougeLsum": 18.761, "eval_runtime": 3.9276, "eval_samples_per_second": 19.859, "eval_steps_per_second": 5.092, "step": 1180 }, { "epoch": 10.17, "learning_rate": 1.4847457627118646e-05, "loss": 1.5899, "step": 1200 }, { "epoch": 11.0, "eval_gen_len": 19.8077, "eval_loss": 2.09187650680542, "eval_rouge1": 20.8546, "eval_rouge2": 10.6958, "eval_rougeL": 16.8637, "eval_rougeLsum": 16.9499, "eval_runtime": 3.8954, "eval_samples_per_second": 20.024, "eval_steps_per_second": 5.134, "step": 1298 }, { "epoch": 11.02, "learning_rate": 1.357627118644068e-05, "loss": 1.534, "step": 1300 }, { "epoch": 11.86, "learning_rate": 1.2305084745762711e-05, "loss": 1.4661, "step": 1400 }, { "epoch": 12.0, "eval_gen_len": 19.9744, "eval_loss": 2.106508255004883, "eval_rouge1": 22.3677, "eval_rouge2": 11.7472, "eval_rougeL": 18.262, "eval_rougeLsum": 18.3, "eval_runtime": 3.8544, "eval_samples_per_second": 20.237, "eval_steps_per_second": 5.189, "step": 1416 }, { "epoch": 12.71, "learning_rate": 1.1033898305084746e-05, "loss": 1.4205, "step": 1500 }, { "epoch": 13.0, "eval_gen_len": 19.9359, "eval_loss": 2.116386651992798, "eval_rouge1": 20.5845, "eval_rouge2": 10.7825, "eval_rougeL": 16.9972, "eval_rougeLsum": 17.0216, "eval_runtime": 3.9246, "eval_samples_per_second": 19.875, "eval_steps_per_second": 5.096, "step": 1534 }, { "epoch": 13.56, "learning_rate": 9.76271186440678e-06, "loss": 1.3797, "step": 1600 }, { "epoch": 14.0, "eval_gen_len": 19.9744, "eval_loss": 2.124025344848633, "eval_rouge1": 22.2561, "eval_rouge2": 11.303, "eval_rougeL": 17.5064, "eval_rougeLsum": 17.5815, "eval_runtime": 3.8828, "eval_samples_per_second": 20.089, "eval_steps_per_second": 5.151, "step": 1652 }, { "epoch": 14.41, "learning_rate": 8.491525423728813e-06, "loss": 1.3724, "step": 1700 }, { "epoch": 15.0, "eval_gen_len": 19.9359, "eval_loss": 2.118665933609009, "eval_rouge1": 23.2825, "eval_rouge2": 11.912, "eval_rougeL": 18.5208, "eval_rougeLsum": 18.5499, "eval_runtime": 3.9164, "eval_samples_per_second": 19.916, "eval_steps_per_second": 5.107, "step": 1770 }, { "epoch": 15.25, "learning_rate": 7.220338983050848e-06, "loss": 1.3404, "step": 1800 }, { "epoch": 16.0, "eval_gen_len": 19.9744, "eval_loss": 2.139371395111084, "eval_rouge1": 22.1305, "eval_rouge2": 10.5258, "eval_rougeL": 17.772, "eval_rougeLsum": 17.8202, "eval_runtime": 3.9181, "eval_samples_per_second": 19.908, "eval_steps_per_second": 5.105, "step": 1888 }, { "epoch": 16.1, "learning_rate": 5.949152542372881e-06, "loss": 1.2967, "step": 1900 }, { "epoch": 16.95, "learning_rate": 4.677966101694916e-06, "loss": 1.2846, "step": 2000 }, { "epoch": 17.0, "eval_gen_len": 20.0, "eval_loss": 2.1501927375793457, "eval_rouge1": 21.567, "eval_rouge2": 11.0557, "eval_rougeL": 17.2562, "eval_rougeLsum": 17.2974, "eval_runtime": 3.8698, "eval_samples_per_second": 20.156, "eval_steps_per_second": 5.168, "step": 2006 }, { "epoch": 17.8, "learning_rate": 3.406779661016949e-06, "loss": 1.2871, "step": 2100 }, { "epoch": 18.0, "eval_gen_len": 19.9744, "eval_loss": 2.1571571826934814, "eval_rouge1": 22.5871, "eval_rouge2": 11.702, "eval_rougeL": 18.2906, "eval_rougeLsum": 18.3826, "eval_runtime": 3.8649, "eval_samples_per_second": 20.182, "eval_steps_per_second": 5.175, "step": 2124 }, { "epoch": 18.64, "learning_rate": 2.1355932203389833e-06, "loss": 1.2422, "step": 2200 }, { "epoch": 19.0, "eval_gen_len": 19.9744, "eval_loss": 2.161255359649658, "eval_rouge1": 23.0935, "eval_rouge2": 11.6824, "eval_rougeL": 18.6087, "eval_rougeLsum": 18.6777, "eval_runtime": 3.8614, "eval_samples_per_second": 20.2, "eval_steps_per_second": 5.18, "step": 2242 }, { "epoch": 19.49, "learning_rate": 8.64406779661017e-07, "loss": 1.2336, "step": 2300 }, { "epoch": 20.0, "eval_gen_len": 19.9487, "eval_loss": 2.158088445663452, "eval_rouge1": 22.6789, "eval_rouge2": 11.4363, "eval_rougeL": 18.1661, "eval_rougeLsum": 18.2346, "eval_runtime": 3.8624, "eval_samples_per_second": 20.195, "eval_steps_per_second": 5.178, "step": 2360 }, { "epoch": 20.0, "step": 2360, "total_flos": 1837511390490624.0, "train_loss": 1.7793180174746757, "train_runtime": 373.3963, "train_samples_per_second": 25.121, "train_steps_per_second": 6.32 } ], "max_steps": 2360, "num_train_epochs": 20, "total_flos": 1837511390490624.0, "trial_name": null, "trial_params": null }