{ "best_metric": 0.9042719006538391, "best_model_checkpoint": "/gscratch/xlab/hallisky/rewriting/src/models/toxic/bart-base_2.5e-05_0_32_jigsaw_randmask/checkpoint-38000", "epoch": 10.552624271035823, "global_step": 38000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 2.4750000000000002e-05, "loss": 1.2139, "step": 500 }, { "epoch": 0.28, "learning_rate": 2.45e-05, "loss": 1.1274, "step": 1000 }, { "epoch": 0.28, "eval_loss": 0.9955187439918518, "eval_runtime": 183.7737, "eval_samples_per_second": 158.445, "eval_steps_per_second": 1.241, "step": 1000 }, { "epoch": 0.42, "learning_rate": 2.425e-05, "loss": 1.1115, "step": 1500 }, { "epoch": 0.56, "learning_rate": 2.4e-05, "loss": 1.0866, "step": 2000 }, { "epoch": 0.56, "eval_loss": 0.9784897565841675, "eval_runtime": 183.1935, "eval_samples_per_second": 158.947, "eval_steps_per_second": 1.245, "step": 2000 }, { "epoch": 0.69, "learning_rate": 2.375e-05, "loss": 1.0865, "step": 2500 }, { "epoch": 0.83, "learning_rate": 2.35e-05, "loss": 1.0748, "step": 3000 }, { "epoch": 0.83, "eval_loss": 0.9661561846733093, "eval_runtime": 183.3031, "eval_samples_per_second": 158.852, "eval_steps_per_second": 1.244, "step": 3000 }, { "epoch": 0.97, "learning_rate": 2.3250000000000003e-05, "loss": 1.073, "step": 3500 }, { "epoch": 1.11, "learning_rate": 2.3000000000000003e-05, "loss": 1.0481, "step": 4000 }, { "epoch": 1.11, "eval_loss": 0.9609841704368591, "eval_runtime": 183.0018, "eval_samples_per_second": 159.113, "eval_steps_per_second": 1.246, "step": 4000 }, { "epoch": 1.25, "learning_rate": 2.275e-05, "loss": 1.0424, "step": 4500 }, { "epoch": 1.39, "learning_rate": 2.25e-05, "loss": 1.0441, "step": 5000 }, { "epoch": 1.39, "eval_loss": 0.9524271488189697, "eval_runtime": 183.0159, "eval_samples_per_second": 159.101, "eval_steps_per_second": 1.246, "step": 5000 }, { "epoch": 1.53, "learning_rate": 2.2250000000000002e-05, "loss": 1.0383, "step": 5500 }, { "epoch": 1.67, "learning_rate": 2.2000000000000003e-05, "loss": 1.0368, "step": 6000 }, { "epoch": 1.67, "eval_loss": 0.9480794668197632, "eval_runtime": 183.0254, "eval_samples_per_second": 159.093, "eval_steps_per_second": 1.246, "step": 6000 }, { "epoch": 1.81, "learning_rate": 2.175e-05, "loss": 1.0295, "step": 6500 }, { "epoch": 1.94, "learning_rate": 2.15e-05, "loss": 1.0294, "step": 7000 }, { "epoch": 1.94, "eval_loss": 0.9428065419197083, "eval_runtime": 183.0141, "eval_samples_per_second": 159.103, "eval_steps_per_second": 1.246, "step": 7000 }, { "epoch": 2.08, "learning_rate": 2.125e-05, "loss": 1.0213, "step": 7500 }, { "epoch": 2.22, "learning_rate": 2.1e-05, "loss": 1.0148, "step": 8000 }, { "epoch": 2.22, "eval_loss": 0.9450138807296753, "eval_runtime": 182.9272, "eval_samples_per_second": 159.178, "eval_steps_per_second": 1.246, "step": 8000 }, { "epoch": 2.36, "learning_rate": 2.075e-05, "loss": 1.0141, "step": 8500 }, { "epoch": 2.5, "learning_rate": 2.05e-05, "loss": 1.0136, "step": 9000 }, { "epoch": 2.5, "eval_loss": 0.9394757151603699, "eval_runtime": 183.1908, "eval_samples_per_second": 158.949, "eval_steps_per_second": 1.245, "step": 9000 }, { "epoch": 2.64, "learning_rate": 2.025e-05, "loss": 1.0117, "step": 9500 }, { "epoch": 2.78, "learning_rate": 2e-05, "loss": 1.0083, "step": 10000 }, { "epoch": 2.78, "eval_loss": 0.9403573274612427, "eval_runtime": 182.9098, "eval_samples_per_second": 159.193, "eval_steps_per_second": 1.247, "step": 10000 }, { "epoch": 2.92, "learning_rate": 1.9750000000000002e-05, "loss": 1.0062, "step": 10500 }, { "epoch": 3.05, "learning_rate": 1.9500000000000003e-05, "loss": 1.0045, "step": 11000 }, { "epoch": 3.05, "eval_loss": 0.9311016201972961, "eval_runtime": 182.9614, "eval_samples_per_second": 159.148, "eval_steps_per_second": 1.246, "step": 11000 }, { "epoch": 3.19, "learning_rate": 1.925e-05, "loss": 0.9962, "step": 11500 }, { "epoch": 3.33, "learning_rate": 1.9e-05, "loss": 0.9913, "step": 12000 }, { "epoch": 3.33, "eval_loss": 0.9348850846290588, "eval_runtime": 182.9469, "eval_samples_per_second": 159.161, "eval_steps_per_second": 1.246, "step": 12000 }, { "epoch": 3.47, "learning_rate": 1.8750000000000002e-05, "loss": 0.9914, "step": 12500 }, { "epoch": 3.61, "learning_rate": 1.85e-05, "loss": 0.9925, "step": 13000 }, { "epoch": 3.61, "eval_loss": 0.9286701679229736, "eval_runtime": 182.7432, "eval_samples_per_second": 159.338, "eval_steps_per_second": 1.248, "step": 13000 }, { "epoch": 3.75, "learning_rate": 1.825e-05, "loss": 0.9957, "step": 13500 }, { "epoch": 3.89, "learning_rate": 1.8e-05, "loss": 0.9929, "step": 14000 }, { "epoch": 3.89, "eval_loss": 0.9286787509918213, "eval_runtime": 183.092, "eval_samples_per_second": 159.035, "eval_steps_per_second": 1.245, "step": 14000 }, { "epoch": 4.03, "learning_rate": 1.775e-05, "loss": 0.9873, "step": 14500 }, { "epoch": 4.17, "learning_rate": 1.75e-05, "loss": 0.9825, "step": 15000 }, { "epoch": 4.17, "eval_loss": 0.9284389615058899, "eval_runtime": 183.0692, "eval_samples_per_second": 159.055, "eval_steps_per_second": 1.245, "step": 15000 }, { "epoch": 4.3, "learning_rate": 1.725e-05, "loss": 0.9791, "step": 15500 }, { "epoch": 4.44, "learning_rate": 1.7000000000000003e-05, "loss": 0.9761, "step": 16000 }, { "epoch": 4.44, "eval_loss": 0.9231427311897278, "eval_runtime": 182.9694, "eval_samples_per_second": 159.141, "eval_steps_per_second": 1.246, "step": 16000 }, { "epoch": 4.58, "learning_rate": 1.675e-05, "loss": 0.9806, "step": 16500 }, { "epoch": 4.72, "learning_rate": 1.65e-05, "loss": 0.9788, "step": 17000 }, { "epoch": 4.72, "eval_loss": 0.9270732998847961, "eval_runtime": 182.9924, "eval_samples_per_second": 159.121, "eval_steps_per_second": 1.246, "step": 17000 }, { "epoch": 4.86, "learning_rate": 1.6250000000000002e-05, "loss": 0.9771, "step": 17500 }, { "epoch": 5.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.9799, "step": 18000 }, { "epoch": 5.0, "eval_loss": 0.9208848476409912, "eval_runtime": 183.0859, "eval_samples_per_second": 159.04, "eval_steps_per_second": 1.245, "step": 18000 }, { "epoch": 5.14, "learning_rate": 1.575e-05, "loss": 0.9684, "step": 18500 }, { "epoch": 5.28, "learning_rate": 1.55e-05, "loss": 0.9655, "step": 19000 }, { "epoch": 5.28, "eval_loss": 0.9188591837882996, "eval_runtime": 183.2317, "eval_samples_per_second": 158.914, "eval_steps_per_second": 1.244, "step": 19000 }, { "epoch": 5.42, "learning_rate": 1.525e-05, "loss": 0.969, "step": 19500 }, { "epoch": 5.55, "learning_rate": 1.5e-05, "loss": 0.9678, "step": 20000 }, { "epoch": 5.55, "eval_loss": 0.920549213886261, "eval_runtime": 182.9534, "eval_samples_per_second": 159.155, "eval_steps_per_second": 1.246, "step": 20000 }, { "epoch": 5.69, "learning_rate": 1.475e-05, "loss": 0.9672, "step": 20500 }, { "epoch": 5.83, "learning_rate": 1.45e-05, "loss": 0.9723, "step": 21000 }, { "epoch": 5.83, "eval_loss": 0.9192747473716736, "eval_runtime": 182.9328, "eval_samples_per_second": 159.173, "eval_steps_per_second": 1.246, "step": 21000 }, { "epoch": 5.97, "learning_rate": 1.4249999999999999e-05, "loss": 0.9667, "step": 21500 }, { "epoch": 6.11, "learning_rate": 1.4000000000000001e-05, "loss": 0.9603, "step": 22000 }, { "epoch": 6.11, "eval_loss": 0.9202588200569153, "eval_runtime": 182.9947, "eval_samples_per_second": 159.119, "eval_steps_per_second": 1.246, "step": 22000 }, { "epoch": 6.25, "learning_rate": 1.3750000000000002e-05, "loss": 0.9589, "step": 22500 }, { "epoch": 6.39, "learning_rate": 1.3500000000000001e-05, "loss": 0.9593, "step": 23000 }, { "epoch": 6.39, "eval_loss": 0.9192423820495605, "eval_runtime": 183.1637, "eval_samples_per_second": 158.973, "eval_steps_per_second": 1.245, "step": 23000 }, { "epoch": 6.53, "learning_rate": 1.3250000000000002e-05, "loss": 0.961, "step": 23500 }, { "epoch": 6.66, "learning_rate": 1.3000000000000001e-05, "loss": 0.9634, "step": 24000 }, { "epoch": 6.66, "eval_loss": 0.9148619771003723, "eval_runtime": 182.8693, "eval_samples_per_second": 159.228, "eval_steps_per_second": 1.247, "step": 24000 }, { "epoch": 6.8, "learning_rate": 1.2750000000000002e-05, "loss": 0.958, "step": 24500 }, { "epoch": 6.94, "learning_rate": 1.25e-05, "loss": 0.9577, "step": 25000 }, { "epoch": 6.94, "eval_loss": 0.9157229065895081, "eval_runtime": 182.8974, "eval_samples_per_second": 159.204, "eval_steps_per_second": 1.247, "step": 25000 }, { "epoch": 7.08, "learning_rate": 1.225e-05, "loss": 0.9543, "step": 25500 }, { "epoch": 7.22, "learning_rate": 1.2e-05, "loss": 0.9505, "step": 26000 }, { "epoch": 7.22, "eval_loss": 0.9146404266357422, "eval_runtime": 182.7838, "eval_samples_per_second": 159.303, "eval_steps_per_second": 1.247, "step": 26000 }, { "epoch": 7.36, "learning_rate": 1.175e-05, "loss": 0.9478, "step": 26500 }, { "epoch": 7.5, "learning_rate": 1.1500000000000002e-05, "loss": 0.9546, "step": 27000 }, { "epoch": 7.5, "eval_loss": 0.9135451912879944, "eval_runtime": 182.7429, "eval_samples_per_second": 159.339, "eval_steps_per_second": 1.248, "step": 27000 }, { "epoch": 7.64, "learning_rate": 1.125e-05, "loss": 0.9481, "step": 27500 }, { "epoch": 7.78, "learning_rate": 1.1000000000000001e-05, "loss": 0.948, "step": 28000 }, { "epoch": 7.78, "eval_loss": 0.9123603701591492, "eval_runtime": 183.1278, "eval_samples_per_second": 159.004, "eval_steps_per_second": 1.245, "step": 28000 }, { "epoch": 7.91, "learning_rate": 1.075e-05, "loss": 0.948, "step": 28500 }, { "epoch": 8.05, "learning_rate": 1.05e-05, "loss": 0.9461, "step": 29000 }, { "epoch": 8.05, "eval_loss": 0.9139257073402405, "eval_runtime": 182.9341, "eval_samples_per_second": 159.172, "eval_steps_per_second": 1.246, "step": 29000 }, { "epoch": 8.19, "learning_rate": 1.025e-05, "loss": 0.9442, "step": 29500 }, { "epoch": 8.33, "learning_rate": 1e-05, "loss": 0.9441, "step": 30000 }, { "epoch": 8.33, "eval_loss": 0.9109994173049927, "eval_runtime": 451.1148, "eval_samples_per_second": 64.547, "eval_steps_per_second": 0.505, "step": 30000 }, { "epoch": 8.47, "learning_rate": 9.750000000000002e-06, "loss": 0.9431, "step": 30500 }, { "epoch": 8.61, "learning_rate": 9.5e-06, "loss": 0.9408, "step": 31000 }, { "epoch": 8.61, "eval_loss": 0.911307692527771, "eval_runtime": 182.7894, "eval_samples_per_second": 159.298, "eval_steps_per_second": 1.247, "step": 31000 }, { "epoch": 8.75, "learning_rate": 9.25e-06, "loss": 0.9429, "step": 31500 }, { "epoch": 8.89, "learning_rate": 9e-06, "loss": 0.9391, "step": 32000 }, { "epoch": 8.89, "eval_loss": 0.9080024361610413, "eval_runtime": 183.033, "eval_samples_per_second": 159.086, "eval_steps_per_second": 1.246, "step": 32000 }, { "epoch": 9.03, "learning_rate": 8.75e-06, "loss": 0.9427, "step": 32500 }, { "epoch": 9.16, "learning_rate": 8.500000000000002e-06, "loss": 0.942, "step": 33000 }, { "epoch": 9.16, "eval_loss": 0.909125804901123, "eval_runtime": 183.0287, "eval_samples_per_second": 159.09, "eval_steps_per_second": 1.246, "step": 33000 }, { "epoch": 9.3, "learning_rate": 8.25e-06, "loss": 0.9371, "step": 33500 }, { "epoch": 9.44, "learning_rate": 8.000000000000001e-06, "loss": 0.9338, "step": 34000 }, { "epoch": 9.44, "eval_loss": 0.9082431793212891, "eval_runtime": 183.1001, "eval_samples_per_second": 159.028, "eval_steps_per_second": 1.245, "step": 34000 }, { "epoch": 9.58, "learning_rate": 7.75e-06, "loss": 0.9354, "step": 34500 }, { "epoch": 9.72, "learning_rate": 7.5e-06, "loss": 0.9337, "step": 35000 }, { "epoch": 9.72, "eval_loss": 0.9084173440933228, "eval_runtime": 183.0616, "eval_samples_per_second": 159.061, "eval_steps_per_second": 1.245, "step": 35000 }, { "epoch": 9.86, "learning_rate": 7.25e-06, "loss": 0.9358, "step": 35500 }, { "epoch": 10.0, "learning_rate": 7.000000000000001e-06, "loss": 0.9355, "step": 36000 }, { "epoch": 10.0, "eval_loss": 0.907382071018219, "eval_runtime": 183.0579, "eval_samples_per_second": 159.064, "eval_steps_per_second": 1.246, "step": 36000 }, { "epoch": 10.14, "learning_rate": 6.750000000000001e-06, "loss": 0.9324, "step": 36500 }, { "epoch": 10.27, "learning_rate": 6.5000000000000004e-06, "loss": 0.9323, "step": 37000 }, { "epoch": 10.27, "eval_loss": 0.9063876271247864, "eval_runtime": 183.0015, "eval_samples_per_second": 159.113, "eval_steps_per_second": 1.246, "step": 37000 }, { "epoch": 10.41, "learning_rate": 6.25e-06, "loss": 0.9295, "step": 37500 }, { "epoch": 10.55, "learning_rate": 6e-06, "loss": 0.9321, "step": 38000 }, { "epoch": 10.55, "eval_loss": 0.9042719006538391, "eval_runtime": 183.0185, "eval_samples_per_second": 159.099, "eval_steps_per_second": 1.246, "step": 38000 } ], "max_steps": 50000, "num_train_epochs": 14, "total_flos": 1.4479335456768e+17, "trial_name": null, "trial_params": null }