|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.074688796680498, |
|
"global_step": 5000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 87.4023, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 52.564, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 6e-06, |
|
"loss": 41.0172, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 36.522, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-05, |
|
"loss": 33.7109, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.2e-05, |
|
"loss": 31.384, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4000000000000001e-05, |
|
"loss": 29.2667, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.6000000000000003e-05, |
|
"loss": 27.6322, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.8e-05, |
|
"loss": 25.95, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 2e-05, |
|
"loss": 24.3416, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 2.2000000000000003e-05, |
|
"loss": 22.5298, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.4e-05, |
|
"loss": 20.7681, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.6000000000000002e-05, |
|
"loss": 19.4657, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.8000000000000003e-05, |
|
"loss": 17.7138, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3e-05, |
|
"loss": 16.3022, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.2000000000000005e-05, |
|
"loss": 14.6267, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.4000000000000007e-05, |
|
"loss": 13.0298, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.6e-05, |
|
"loss": 11.5227, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.8e-05, |
|
"loss": 9.8717, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4e-05, |
|
"loss": 8.0718, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_bleu": 5.8841, |
|
"eval_em": 0.0, |
|
"eval_gen_len": 126.98, |
|
"eval_loss": 6.968929290771484, |
|
"eval_rm": 0.0, |
|
"eval_runtime": 84.2898, |
|
"eval_samples_per_second": 0.593, |
|
"eval_steps_per_second": 0.083, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.2e-05, |
|
"loss": 7.3621, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 6.6663, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 6.4464, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 6.0977, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 5.8656, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.976851851851852e-05, |
|
"loss": 5.6319, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.9537037037037035e-05, |
|
"loss": 5.2821, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 4.930555555555556e-05, |
|
"loss": 4.9816, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.9074074074074075e-05, |
|
"loss": 5.0416, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.8842592592592595e-05, |
|
"loss": 4.9786, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 4.8611111111111115e-05, |
|
"loss": 4.7885, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 4.837962962962963e-05, |
|
"loss": 4.6081, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 4.4985, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.791666666666667e-05, |
|
"loss": 4.3774, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.768518518518519e-05, |
|
"loss": 4.2805, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 4.745370370370371e-05, |
|
"loss": 3.9643, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 3.9951, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.699074074074074e-05, |
|
"loss": 3.9219, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 4.675925925925926e-05, |
|
"loss": 3.7154, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 4.652777777777778e-05, |
|
"loss": 3.6551, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_bleu": 20.8847, |
|
"eval_em": 0.0, |
|
"eval_gen_len": 30.46, |
|
"eval_loss": 3.524771213531494, |
|
"eval_rm": 0.0, |
|
"eval_runtime": 30.3441, |
|
"eval_samples_per_second": 1.648, |
|
"eval_steps_per_second": 0.231, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 3.6855, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.6064814814814814e-05, |
|
"loss": 3.5543, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 3.4561, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.5601851851851854e-05, |
|
"loss": 3.3651, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 4.5370370370370374e-05, |
|
"loss": 3.1537, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.5138888888888894e-05, |
|
"loss": 3.3103, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 4.490740740740741e-05, |
|
"loss": 3.1797, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 4.467592592592593e-05, |
|
"loss": 3.1758, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 2.9908, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.4212962962962966e-05, |
|
"loss": 2.7989, |
|
"step": 5000 |
|
} |
|
], |
|
"max_steps": 24100, |
|
"num_train_epochs": 10, |
|
"total_flos": 1401098107488768.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|