{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.0, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1.98e-05, "loss": 1.4142, "step": 500 }, { "epoch": 0.2, "learning_rate": 1.9600000000000002e-05, "loss": 1.2006, "step": 1000 }, { "epoch": 0.3, "learning_rate": 1.94e-05, "loss": 1.1483, "step": 1500 }, { "epoch": 0.4, "learning_rate": 1.9200000000000003e-05, "loss": 1.1203, "step": 2000 }, { "epoch": 0.5, "learning_rate": 1.9e-05, "loss": 1.1042, "step": 2500 }, { "epoch": 0.6, "learning_rate": 1.88e-05, "loss": 1.0782, "step": 3000 }, { "epoch": 0.7, "learning_rate": 1.86e-05, "loss": 1.0579, "step": 3500 }, { "epoch": 0.8, "learning_rate": 1.8400000000000003e-05, "loss": 1.0638, "step": 4000 }, { "epoch": 0.9, "learning_rate": 1.8200000000000002e-05, "loss": 1.0532, "step": 4500 }, { "epoch": 1.0, "learning_rate": 1.8e-05, "loss": 1.0355, "step": 5000 }, { "epoch": 1.0, "eval_gen_len": 9.6717, "eval_loss": 0.9534130692481995, "eval_rouge1": 11.1402, "eval_rouge2": 2.3949, "eval_rougeL": 11.0659, "eval_rougeLsum": 11.1028, "eval_runtime": 604.1787, "eval_samples_per_second": 16.551, "step": 5000 }, { "epoch": 1.1, "learning_rate": 1.7800000000000002e-05, "loss": 1.0246, "step": 5500 }, { "epoch": 1.2, "learning_rate": 1.76e-05, "loss": 1.0355, "step": 6000 }, { "epoch": 1.3, "learning_rate": 1.7400000000000003e-05, "loss": 1.0076, "step": 6500 }, { "epoch": 1.4, "learning_rate": 1.72e-05, "loss": 0.9938, "step": 7000 }, { "epoch": 1.5, "learning_rate": 1.7e-05, "loss": 0.9928, "step": 7500 }, { "epoch": 1.6, "learning_rate": 1.6800000000000002e-05, "loss": 1.0041, "step": 8000 }, { "epoch": 1.7, "learning_rate": 1.66e-05, "loss": 0.9988, "step": 8500 }, { "epoch": 1.8, "learning_rate": 1.64e-05, "loss": 0.9949, "step": 9000 }, { "epoch": 1.9, "learning_rate": 1.62e-05, "loss": 0.9879, "step": 9500 }, { "epoch": 2.0, "learning_rate": 1.6000000000000003e-05, "loss": 1.0008, "step": 10000 }, { "epoch": 2.0, "eval_gen_len": 10.509, "eval_loss": 0.9204540252685547, "eval_rouge1": 11.224, "eval_rouge2": 2.672, "eval_rougeL": 11.1021, "eval_rougeLsum": 11.1328, "eval_runtime": 596.5212, "eval_samples_per_second": 16.764, "step": 10000 }, { "epoch": 2.1, "learning_rate": 1.58e-05, "loss": 0.9723, "step": 10500 }, { "epoch": 2.2, "learning_rate": 1.5600000000000003e-05, "loss": 0.9784, "step": 11000 }, { "epoch": 2.3, "learning_rate": 1.54e-05, "loss": 0.9629, "step": 11500 }, { "epoch": 2.4, "learning_rate": 1.5200000000000002e-05, "loss": 0.9719, "step": 12000 }, { "epoch": 2.5, "learning_rate": 1.5000000000000002e-05, "loss": 0.9644, "step": 12500 }, { "epoch": 2.6, "learning_rate": 1.48e-05, "loss": 0.9847, "step": 13000 }, { "epoch": 2.7, "learning_rate": 1.46e-05, "loss": 0.9717, "step": 13500 }, { "epoch": 2.8, "learning_rate": 1.4400000000000001e-05, "loss": 0.9644, "step": 14000 }, { "epoch": 2.9, "learning_rate": 1.4200000000000001e-05, "loss": 0.9618, "step": 14500 }, { "epoch": 3.0, "learning_rate": 1.4e-05, "loss": 0.9602, "step": 15000 }, { "epoch": 3.0, "eval_gen_len": 9.474, "eval_loss": 0.9030117392539978, "eval_rouge1": 12.4503, "eval_rouge2": 2.8721, "eval_rougeL": 12.354, "eval_rougeLsum": 12.3602, "eval_runtime": 585.0965, "eval_samples_per_second": 17.091, "step": 15000 }, { "epoch": 3.1, "learning_rate": 1.38e-05, "loss": 0.9477, "step": 15500 }, { "epoch": 3.2, "learning_rate": 1.3600000000000002e-05, "loss": 0.9582, "step": 16000 }, { "epoch": 3.3, "learning_rate": 1.3400000000000002e-05, "loss": 0.9391, "step": 16500 }, { "epoch": 3.4, "learning_rate": 1.3200000000000002e-05, "loss": 0.9626, "step": 17000 }, { "epoch": 3.5, "learning_rate": 1.3000000000000001e-05, "loss": 0.951, "step": 17500 }, { "epoch": 3.6, "learning_rate": 1.2800000000000001e-05, "loss": 0.9472, "step": 18000 }, { "epoch": 3.7, "learning_rate": 1.2600000000000001e-05, "loss": 0.9519, "step": 18500 }, { "epoch": 3.8, "learning_rate": 1.2400000000000002e-05, "loss": 0.952, "step": 19000 }, { "epoch": 3.9, "learning_rate": 1.22e-05, "loss": 0.945, "step": 19500 }, { "epoch": 4.0, "learning_rate": 1.2e-05, "loss": 0.9381, "step": 20000 }, { "epoch": 4.0, "eval_gen_len": 9.4554, "eval_loss": 0.8888181447982788, "eval_rouge1": 12.4117, "eval_rouge2": 2.9965, "eval_rougeL": 12.3285, "eval_rougeLsum": 12.3327, "eval_runtime": 592.8034, "eval_samples_per_second": 16.869, "step": 20000 }, { "epoch": 4.1, "learning_rate": 1.18e-05, "loss": 0.9535, "step": 20500 }, { "epoch": 4.2, "learning_rate": 1.16e-05, "loss": 0.9458, "step": 21000 }, { "epoch": 4.3, "learning_rate": 1.14e-05, "loss": 0.9321, "step": 21500 }, { "epoch": 4.4, "learning_rate": 1.1200000000000001e-05, "loss": 0.9446, "step": 22000 }, { "epoch": 4.5, "learning_rate": 1.1000000000000001e-05, "loss": 0.9231, "step": 22500 }, { "epoch": 4.6, "learning_rate": 1.0800000000000002e-05, "loss": 0.9285, "step": 23000 }, { "epoch": 4.7, "learning_rate": 1.0600000000000002e-05, "loss": 0.9367, "step": 23500 }, { "epoch": 4.8, "learning_rate": 1.04e-05, "loss": 0.9361, "step": 24000 }, { "epoch": 4.9, "learning_rate": 1.02e-05, "loss": 0.9194, "step": 24500 }, { "epoch": 5.0, "learning_rate": 1e-05, "loss": 0.9288, "step": 25000 }, { "epoch": 5.0, "eval_gen_len": 9.6762, "eval_loss": 0.8804787993431091, "eval_rouge1": 12.6469, "eval_rouge2": 2.9175, "eval_rougeL": 12.5557, "eval_rougeLsum": 12.5805, "eval_runtime": 595.0767, "eval_samples_per_second": 16.805, "step": 25000 }, { "epoch": 5.1, "learning_rate": 9.800000000000001e-06, "loss": 0.9299, "step": 25500 }, { "epoch": 5.2, "learning_rate": 9.600000000000001e-06, "loss": 0.9313, "step": 26000 }, { "epoch": 5.3, "learning_rate": 9.4e-06, "loss": 0.9239, "step": 26500 }, { "epoch": 5.4, "learning_rate": 9.200000000000002e-06, "loss": 0.9238, "step": 27000 }, { "epoch": 5.5, "learning_rate": 9e-06, "loss": 0.9291, "step": 27500 }, { "epoch": 5.6, "learning_rate": 8.8e-06, "loss": 0.9207, "step": 28000 }, { "epoch": 5.7, "learning_rate": 8.6e-06, "loss": 0.9256, "step": 28500 }, { "epoch": 5.8, "learning_rate": 8.400000000000001e-06, "loss": 0.9199, "step": 29000 }, { "epoch": 5.9, "learning_rate": 8.2e-06, "loss": 0.9113, "step": 29500 }, { "epoch": 6.0, "learning_rate": 8.000000000000001e-06, "loss": 0.9243, "step": 30000 }, { "epoch": 6.0, "eval_gen_len": 10.0823, "eval_loss": 0.8752232193946838, "eval_rouge1": 14.0898, "eval_rouge2": 3.4389, "eval_rougeL": 13.9627, "eval_rougeLsum": 13.9793, "eval_runtime": 599.6266, "eval_samples_per_second": 16.677, "step": 30000 }, { "epoch": 6.1, "learning_rate": 7.800000000000002e-06, "loss": 0.9268, "step": 30500 }, { "epoch": 6.2, "learning_rate": 7.600000000000001e-06, "loss": 0.9144, "step": 31000 }, { "epoch": 6.3, "learning_rate": 7.4e-06, "loss": 0.9152, "step": 31500 }, { "epoch": 6.4, "learning_rate": 7.2000000000000005e-06, "loss": 0.9016, "step": 32000 }, { "epoch": 6.5, "learning_rate": 7e-06, "loss": 0.9119, "step": 32500 }, { "epoch": 6.6, "learning_rate": 6.800000000000001e-06, "loss": 0.916, "step": 33000 }, { "epoch": 6.7, "learning_rate": 6.600000000000001e-06, "loss": 0.9253, "step": 33500 }, { "epoch": 6.8, "learning_rate": 6.4000000000000006e-06, "loss": 0.9199, "step": 34000 }, { "epoch": 6.9, "learning_rate": 6.200000000000001e-06, "loss": 0.9168, "step": 34500 }, { "epoch": 7.0, "learning_rate": 6e-06, "loss": 0.9087, "step": 35000 }, { "epoch": 7.0, "eval_gen_len": 9.7588, "eval_loss": 0.8731149435043335, "eval_rouge1": 13.9046, "eval_rouge2": 3.2515, "eval_rougeL": 13.7955, "eval_rougeLsum": 13.8149, "eval_runtime": 600.5144, "eval_samples_per_second": 16.652, "step": 35000 }, { "epoch": 7.1, "learning_rate": 5.8e-06, "loss": 0.905, "step": 35500 }, { "epoch": 7.2, "learning_rate": 5.600000000000001e-06, "loss": 0.9005, "step": 36000 }, { "epoch": 7.3, "learning_rate": 5.400000000000001e-06, "loss": 0.9214, "step": 36500 }, { "epoch": 7.4, "learning_rate": 5.2e-06, "loss": 0.9152, "step": 37000 }, { "epoch": 7.5, "learning_rate": 5e-06, "loss": 0.9089, "step": 37500 }, { "epoch": 7.6, "learning_rate": 4.800000000000001e-06, "loss": 0.9061, "step": 38000 }, { "epoch": 7.7, "learning_rate": 4.600000000000001e-06, "loss": 0.918, "step": 38500 }, { "epoch": 7.8, "learning_rate": 4.4e-06, "loss": 0.9055, "step": 39000 }, { "epoch": 7.9, "learning_rate": 4.2000000000000004e-06, "loss": 0.9067, "step": 39500 }, { "epoch": 8.0, "learning_rate": 4.000000000000001e-06, "loss": 0.9146, "step": 40000 }, { "epoch": 8.0, "eval_gen_len": 9.8881, "eval_loss": 0.871009886264801, "eval_rouge1": 14.9652, "eval_rouge2": 3.5015, "eval_rougeL": 14.8443, "eval_rougeLsum": 14.8507, "eval_runtime": 606.6389, "eval_samples_per_second": 16.484, "step": 40000 }, { "epoch": 8.1, "learning_rate": 3.8000000000000005e-06, "loss": 0.9119, "step": 40500 }, { "epoch": 8.2, "learning_rate": 3.6000000000000003e-06, "loss": 0.8942, "step": 41000 }, { "epoch": 8.3, "learning_rate": 3.4000000000000005e-06, "loss": 0.9066, "step": 41500 }, { "epoch": 8.4, "learning_rate": 3.2000000000000003e-06, "loss": 0.92, "step": 42000 }, { "epoch": 8.5, "learning_rate": 3e-06, "loss": 0.9046, "step": 42500 }, { "epoch": 8.6, "learning_rate": 2.8000000000000003e-06, "loss": 0.9027, "step": 43000 }, { "epoch": 8.7, "learning_rate": 2.6e-06, "loss": 0.9019, "step": 43500 }, { "epoch": 8.8, "learning_rate": 2.4000000000000003e-06, "loss": 0.9056, "step": 44000 }, { "epoch": 8.9, "learning_rate": 2.2e-06, "loss": 0.899, "step": 44500 }, { "epoch": 9.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.9034, "step": 45000 }, { "epoch": 9.0, "eval_gen_len": 9.8936, "eval_loss": 0.8681530952453613, "eval_rouge1": 14.9995, "eval_rouge2": 3.5176, "eval_rougeL": 14.8747, "eval_rougeLsum": 14.8748, "eval_runtime": 601.5997, "eval_samples_per_second": 16.622, "step": 45000 }, { "epoch": 9.1, "learning_rate": 1.8000000000000001e-06, "loss": 0.9079, "step": 45500 }, { "epoch": 9.2, "learning_rate": 1.6000000000000001e-06, "loss": 0.9027, "step": 46000 }, { "epoch": 9.3, "learning_rate": 1.4000000000000001e-06, "loss": 0.8944, "step": 46500 }, { "epoch": 9.4, "learning_rate": 1.2000000000000002e-06, "loss": 0.8955, "step": 47000 }, { "epoch": 9.5, "learning_rate": 1.0000000000000002e-06, "loss": 0.8949, "step": 47500 }, { "epoch": 9.6, "learning_rate": 8.000000000000001e-07, "loss": 0.9037, "step": 48000 }, { "epoch": 9.7, "learning_rate": 6.000000000000001e-07, "loss": 0.9074, "step": 48500 }, { "epoch": 9.8, "learning_rate": 4.0000000000000003e-07, "loss": 0.9145, "step": 49000 }, { "epoch": 9.9, "learning_rate": 2.0000000000000002e-07, "loss": 0.9035, "step": 49500 }, { "epoch": 10.0, "learning_rate": 0.0, "loss": 0.9147, "step": 50000 } ], "max_steps": 50000, "num_train_epochs": 10, "total_flos": 6.850590755050291e+16, "trial_name": null, "trial_params": null }