|
{ |
|
"best_metric": 0.9042719006538391, |
|
"best_model_checkpoint": "/gscratch/xlab/hallisky/rewriting/src/models/toxic/bart-base_2.5e-05_0_32_jigsaw_randmask/checkpoint-38000", |
|
"epoch": 10.552624271035823, |
|
"global_step": 38000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 2.4750000000000002e-05, |
|
"loss": 1.2139, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.45e-05, |
|
"loss": 1.1274, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.9955187439918518, |
|
"eval_runtime": 183.7737, |
|
"eval_samples_per_second": 158.445, |
|
"eval_steps_per_second": 1.241, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.425e-05, |
|
"loss": 1.1115, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.0866, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 0.9784897565841675, |
|
"eval_runtime": 183.1935, |
|
"eval_samples_per_second": 158.947, |
|
"eval_steps_per_second": 1.245, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 2.375e-05, |
|
"loss": 1.0865, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 2.35e-05, |
|
"loss": 1.0748, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.9661561846733093, |
|
"eval_runtime": 183.3031, |
|
"eval_samples_per_second": 158.852, |
|
"eval_steps_per_second": 1.244, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.3250000000000003e-05, |
|
"loss": 1.073, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.3000000000000003e-05, |
|
"loss": 1.0481, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 0.9609841704368591, |
|
"eval_runtime": 183.0018, |
|
"eval_samples_per_second": 159.113, |
|
"eval_steps_per_second": 1.246, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.275e-05, |
|
"loss": 1.0424, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 2.25e-05, |
|
"loss": 1.0441, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.9524271488189697, |
|
"eval_runtime": 183.0159, |
|
"eval_samples_per_second": 159.101, |
|
"eval_steps_per_second": 1.246, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 2.2250000000000002e-05, |
|
"loss": 1.0383, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 1.0368, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_loss": 0.9480794668197632, |
|
"eval_runtime": 183.0254, |
|
"eval_samples_per_second": 159.093, |
|
"eval_steps_per_second": 1.246, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.175e-05, |
|
"loss": 1.0295, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 2.15e-05, |
|
"loss": 1.0294, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.9428065419197083, |
|
"eval_runtime": 183.0141, |
|
"eval_samples_per_second": 159.103, |
|
"eval_steps_per_second": 1.246, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.125e-05, |
|
"loss": 1.0213, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 2.1e-05, |
|
"loss": 1.0148, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 0.9450138807296753, |
|
"eval_runtime": 182.9272, |
|
"eval_samples_per_second": 159.178, |
|
"eval_steps_per_second": 1.246, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.075e-05, |
|
"loss": 1.0141, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 2.05e-05, |
|
"loss": 1.0136, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_loss": 0.9394757151603699, |
|
"eval_runtime": 183.1908, |
|
"eval_samples_per_second": 158.949, |
|
"eval_steps_per_second": 1.245, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 2.025e-05, |
|
"loss": 1.0117, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2e-05, |
|
"loss": 1.0083, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"eval_loss": 0.9403573274612427, |
|
"eval_runtime": 182.9098, |
|
"eval_samples_per_second": 159.193, |
|
"eval_steps_per_second": 1.247, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 1.9750000000000002e-05, |
|
"loss": 1.0062, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 1.9500000000000003e-05, |
|
"loss": 1.0045, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 0.9311016201972961, |
|
"eval_runtime": 182.9614, |
|
"eval_samples_per_second": 159.148, |
|
"eval_steps_per_second": 1.246, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 1.925e-05, |
|
"loss": 0.9962, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"learning_rate": 1.9e-05, |
|
"loss": 0.9913, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.9348850846290588, |
|
"eval_runtime": 182.9469, |
|
"eval_samples_per_second": 159.161, |
|
"eval_steps_per_second": 1.246, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.9914, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.85e-05, |
|
"loss": 0.9925, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 0.9286701679229736, |
|
"eval_runtime": 182.7432, |
|
"eval_samples_per_second": 159.338, |
|
"eval_steps_per_second": 1.248, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.75, |
|
"learning_rate": 1.825e-05, |
|
"loss": 0.9957, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 1.8e-05, |
|
"loss": 0.9929, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 0.9286787509918213, |
|
"eval_runtime": 183.092, |
|
"eval_samples_per_second": 159.035, |
|
"eval_steps_per_second": 1.245, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 1.775e-05, |
|
"loss": 0.9873, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.9825, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"eval_loss": 0.9284389615058899, |
|
"eval_runtime": 183.0692, |
|
"eval_samples_per_second": 159.055, |
|
"eval_steps_per_second": 1.245, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"learning_rate": 1.725e-05, |
|
"loss": 0.9791, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 1.7000000000000003e-05, |
|
"loss": 0.9761, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"eval_loss": 0.9231427311897278, |
|
"eval_runtime": 182.9694, |
|
"eval_samples_per_second": 159.141, |
|
"eval_steps_per_second": 1.246, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 1.675e-05, |
|
"loss": 0.9806, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 1.65e-05, |
|
"loss": 0.9788, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 0.9270732998847961, |
|
"eval_runtime": 182.9924, |
|
"eval_samples_per_second": 159.121, |
|
"eval_steps_per_second": 1.246, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 4.86, |
|
"learning_rate": 1.6250000000000002e-05, |
|
"loss": 0.9771, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 0.9799, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.9208848476409912, |
|
"eval_runtime": 183.0859, |
|
"eval_samples_per_second": 159.04, |
|
"eval_steps_per_second": 1.245, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 5.14, |
|
"learning_rate": 1.575e-05, |
|
"loss": 0.9684, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 1.55e-05, |
|
"loss": 0.9655, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"eval_loss": 0.9188591837882996, |
|
"eval_runtime": 183.2317, |
|
"eval_samples_per_second": 158.914, |
|
"eval_steps_per_second": 1.244, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"learning_rate": 1.525e-05, |
|
"loss": 0.969, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.9678, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.55, |
|
"eval_loss": 0.920549213886261, |
|
"eval_runtime": 182.9534, |
|
"eval_samples_per_second": 159.155, |
|
"eval_steps_per_second": 1.246, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 1.475e-05, |
|
"loss": 0.9672, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 1.45e-05, |
|
"loss": 0.9723, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_loss": 0.9192747473716736, |
|
"eval_runtime": 182.9328, |
|
"eval_samples_per_second": 159.173, |
|
"eval_steps_per_second": 1.246, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 1.4249999999999999e-05, |
|
"loss": 0.9667, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 0.9603, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.11, |
|
"eval_loss": 0.9202588200569153, |
|
"eval_runtime": 182.9947, |
|
"eval_samples_per_second": 159.119, |
|
"eval_steps_per_second": 1.246, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 1.3750000000000002e-05, |
|
"loss": 0.9589, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.3500000000000001e-05, |
|
"loss": 0.9593, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"eval_loss": 0.9192423820495605, |
|
"eval_runtime": 183.1637, |
|
"eval_samples_per_second": 158.973, |
|
"eval_steps_per_second": 1.245, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 1.3250000000000002e-05, |
|
"loss": 0.961, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"learning_rate": 1.3000000000000001e-05, |
|
"loss": 0.9634, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.66, |
|
"eval_loss": 0.9148619771003723, |
|
"eval_runtime": 182.8693, |
|
"eval_samples_per_second": 159.228, |
|
"eval_steps_per_second": 1.247, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 1.2750000000000002e-05, |
|
"loss": 0.958, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.9577, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 0.9157229065895081, |
|
"eval_runtime": 182.8974, |
|
"eval_samples_per_second": 159.204, |
|
"eval_steps_per_second": 1.247, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.225e-05, |
|
"loss": 0.9543, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 1.2e-05, |
|
"loss": 0.9505, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"eval_loss": 0.9146404266357422, |
|
"eval_runtime": 182.7838, |
|
"eval_samples_per_second": 159.303, |
|
"eval_steps_per_second": 1.247, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 1.175e-05, |
|
"loss": 0.9478, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.1500000000000002e-05, |
|
"loss": 0.9546, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.9135451912879944, |
|
"eval_runtime": 182.7429, |
|
"eval_samples_per_second": 159.339, |
|
"eval_steps_per_second": 1.248, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 7.64, |
|
"learning_rate": 1.125e-05, |
|
"loss": 0.9481, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 1.1000000000000001e-05, |
|
"loss": 0.948, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_loss": 0.9123603701591492, |
|
"eval_runtime": 183.1278, |
|
"eval_samples_per_second": 159.004, |
|
"eval_steps_per_second": 1.245, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 7.91, |
|
"learning_rate": 1.075e-05, |
|
"loss": 0.948, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 1.05e-05, |
|
"loss": 0.9461, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 0.9139257073402405, |
|
"eval_runtime": 182.9341, |
|
"eval_samples_per_second": 159.172, |
|
"eval_steps_per_second": 1.246, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 1.025e-05, |
|
"loss": 0.9442, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"learning_rate": 1e-05, |
|
"loss": 0.9441, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 8.33, |
|
"eval_loss": 0.9109994173049927, |
|
"eval_runtime": 451.1148, |
|
"eval_samples_per_second": 64.547, |
|
"eval_steps_per_second": 0.505, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 9.750000000000002e-06, |
|
"loss": 0.9431, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 9.5e-06, |
|
"loss": 0.9408, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"eval_loss": 0.911307692527771, |
|
"eval_runtime": 182.7894, |
|
"eval_samples_per_second": 159.298, |
|
"eval_steps_per_second": 1.247, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 8.75, |
|
"learning_rate": 9.25e-06, |
|
"loss": 0.9429, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 9e-06, |
|
"loss": 0.9391, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 0.9080024361610413, |
|
"eval_runtime": 183.033, |
|
"eval_samples_per_second": 159.086, |
|
"eval_steps_per_second": 1.246, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 8.75e-06, |
|
"loss": 0.9427, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 8.500000000000002e-06, |
|
"loss": 0.942, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 0.909125804901123, |
|
"eval_runtime": 183.0287, |
|
"eval_samples_per_second": 159.09, |
|
"eval_steps_per_second": 1.246, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 8.25e-06, |
|
"loss": 0.9371, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 0.9338, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 9.44, |
|
"eval_loss": 0.9082431793212891, |
|
"eval_runtime": 183.1001, |
|
"eval_samples_per_second": 159.028, |
|
"eval_steps_per_second": 1.245, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 7.75e-06, |
|
"loss": 0.9354, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.9337, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 9.72, |
|
"eval_loss": 0.9084173440933228, |
|
"eval_runtime": 183.0616, |
|
"eval_samples_per_second": 159.061, |
|
"eval_steps_per_second": 1.245, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 7.25e-06, |
|
"loss": 0.9358, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 7.000000000000001e-06, |
|
"loss": 0.9355, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.907382071018219, |
|
"eval_runtime": 183.0579, |
|
"eval_samples_per_second": 159.064, |
|
"eval_steps_per_second": 1.246, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 6.750000000000001e-06, |
|
"loss": 0.9324, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"learning_rate": 6.5000000000000004e-06, |
|
"loss": 0.9323, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 10.27, |
|
"eval_loss": 0.9063876271247864, |
|
"eval_runtime": 183.0015, |
|
"eval_samples_per_second": 159.113, |
|
"eval_steps_per_second": 1.246, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 10.41, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.9295, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 6e-06, |
|
"loss": 0.9321, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"eval_loss": 0.9042719006538391, |
|
"eval_runtime": 183.0185, |
|
"eval_samples_per_second": 159.099, |
|
"eval_steps_per_second": 1.246, |
|
"step": 38000 |
|
} |
|
], |
|
"max_steps": 50000, |
|
"num_train_epochs": 14, |
|
"total_flos": 1.4479335456768e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|