| { |
| "best_metric": 3.599998950958252, |
| "best_model_checkpoint": "models/GPT2_five_function_42/checkpoint-64080", |
| "epoch": 10.0, |
| "global_step": 64080, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "learning_rate": 1e-05, |
| "loss": 6.9949, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 2e-05, |
| "loss": 5.7461, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 3e-05, |
| "loss": 5.4085, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4e-05, |
| "loss": 5.1434, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 5e-05, |
| "loss": 4.933, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 6e-05, |
| "loss": 4.7568, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.3263572316618317, |
| "eval_loss": 4.562152862548828, |
| "eval_runtime": 1.9609, |
| "eval_samples_per_second": 600.758, |
| "eval_steps_per_second": 5.1, |
| "step": 6408 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 7e-05, |
| "loss": 4.6067, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 8e-05, |
| "loss": 4.4864, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 9e-05, |
| "loss": 4.3914, |
| "step": 9000 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 0.0001, |
| "loss": 4.3109, |
| "step": 10000 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 9.889000000000001e-05, |
| "loss": 4.2433, |
| "step": 11000 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 9.77788888888889e-05, |
| "loss": 4.1832, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.36074087937649557, |
| "eval_loss": 4.056756019592285, |
| "eval_runtime": 1.9676, |
| "eval_samples_per_second": 598.695, |
| "eval_steps_per_second": 5.082, |
| "step": 12816 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 9.66688888888889e-05, |
| "loss": 4.1265, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 9.55588888888889e-05, |
| "loss": 4.0565, |
| "step": 14000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 9.444777777777778e-05, |
| "loss": 4.0283, |
| "step": 15000 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 9.333777777777777e-05, |
| "loss": 4.0035, |
| "step": 16000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 9.222666666666668e-05, |
| "loss": 3.9768, |
| "step": 17000 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 9.111666666666667e-05, |
| "loss": 3.9553, |
| "step": 18000 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 9.000555555555557e-05, |
| "loss": 3.9365, |
| "step": 19000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.3769100169779287, |
| "eval_loss": 3.874519109725952, |
| "eval_runtime": 1.9728, |
| "eval_samples_per_second": 597.128, |
| "eval_steps_per_second": 5.069, |
| "step": 19224 |
| }, |
| { |
| "epoch": 3.12, |
| "learning_rate": 8.889555555555556e-05, |
| "loss": 3.871, |
| "step": 20000 |
| }, |
| { |
| "epoch": 3.28, |
| "learning_rate": 8.778444444444445e-05, |
| "loss": 3.8532, |
| "step": 21000 |
| }, |
| { |
| "epoch": 3.43, |
| "learning_rate": 8.667444444444444e-05, |
| "loss": 3.8443, |
| "step": 22000 |
| }, |
| { |
| "epoch": 3.59, |
| "learning_rate": 8.556444444444445e-05, |
| "loss": 3.8378, |
| "step": 23000 |
| }, |
| { |
| "epoch": 3.75, |
| "learning_rate": 8.445333333333333e-05, |
| "loss": 3.825, |
| "step": 24000 |
| }, |
| { |
| "epoch": 3.9, |
| "learning_rate": 8.334222222222222e-05, |
| "loss": 3.8154, |
| "step": 25000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.3853922971003837, |
| "eval_loss": 3.779280185699463, |
| "eval_runtime": 1.9858, |
| "eval_samples_per_second": 593.214, |
| "eval_steps_per_second": 5.036, |
| "step": 25632 |
| }, |
| { |
| "epoch": 4.06, |
| "learning_rate": 8.223111111111111e-05, |
| "loss": 3.7779, |
| "step": 26000 |
| }, |
| { |
| "epoch": 4.21, |
| "learning_rate": 8.112111111111111e-05, |
| "loss": 3.7386, |
| "step": 27000 |
| }, |
| { |
| "epoch": 4.37, |
| "learning_rate": 8.001e-05, |
| "loss": 3.7408, |
| "step": 28000 |
| }, |
| { |
| "epoch": 4.53, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 3.7369, |
| "step": 29000 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 7.77888888888889e-05, |
| "loss": 3.7354, |
| "step": 30000 |
| }, |
| { |
| "epoch": 4.84, |
| "learning_rate": 7.667888888888889e-05, |
| "loss": 3.7297, |
| "step": 31000 |
| }, |
| { |
| "epoch": 4.99, |
| "learning_rate": 7.556777777777779e-05, |
| "loss": 3.725, |
| "step": 32000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.3909669398286165, |
| "eval_loss": 3.7228105068206787, |
| "eval_runtime": 2.0428, |
| "eval_samples_per_second": 576.664, |
| "eval_steps_per_second": 4.895, |
| "step": 32040 |
| }, |
| { |
| "epoch": 5.15, |
| "learning_rate": 7.445777777777778e-05, |
| "loss": 3.6547, |
| "step": 33000 |
| }, |
| { |
| "epoch": 5.31, |
| "learning_rate": 7.334666666666668e-05, |
| "loss": 3.6602, |
| "step": 34000 |
| }, |
| { |
| "epoch": 5.46, |
| "learning_rate": 7.223666666666667e-05, |
| "loss": 3.6624, |
| "step": 35000 |
| }, |
| { |
| "epoch": 5.62, |
| "learning_rate": 7.112555555555556e-05, |
| "loss": 3.6649, |
| "step": 36000 |
| }, |
| { |
| "epoch": 5.77, |
| "learning_rate": 7.001555555555555e-05, |
| "loss": 3.6615, |
| "step": 37000 |
| }, |
| { |
| "epoch": 5.93, |
| "learning_rate": 6.890444444444445e-05, |
| "loss": 3.6621, |
| "step": 38000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.39523147467347564, |
| "eval_loss": 3.681443691253662, |
| "eval_runtime": 1.9586, |
| "eval_samples_per_second": 601.465, |
| "eval_steps_per_second": 5.106, |
| "step": 38448 |
| }, |
| { |
| "epoch": 6.09, |
| "learning_rate": 6.779444444444444e-05, |
| "loss": 3.6176, |
| "step": 39000 |
| }, |
| { |
| "epoch": 6.24, |
| "learning_rate": 6.668333333333333e-05, |
| "loss": 3.5948, |
| "step": 40000 |
| }, |
| { |
| "epoch": 6.4, |
| "learning_rate": 6.557333333333332e-05, |
| "loss": 3.6021, |
| "step": 41000 |
| }, |
| { |
| "epoch": 6.55, |
| "learning_rate": 6.446222222222223e-05, |
| "loss": 3.6043, |
| "step": 42000 |
| }, |
| { |
| "epoch": 6.71, |
| "learning_rate": 6.335222222222222e-05, |
| "loss": 3.6064, |
| "step": 43000 |
| }, |
| { |
| "epoch": 6.87, |
| "learning_rate": 6.224111111111112e-05, |
| "loss": 3.6072, |
| "step": 44000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.39868721842706845, |
| "eval_loss": 3.6536548137664795, |
| "eval_runtime": 1.9678, |
| "eval_samples_per_second": 598.633, |
| "eval_steps_per_second": 5.082, |
| "step": 44856 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 6.113111111111111e-05, |
| "loss": 3.5958, |
| "step": 45000 |
| }, |
| { |
| "epoch": 7.18, |
| "learning_rate": 6.002e-05, |
| "loss": 3.5371, |
| "step": 46000 |
| }, |
| { |
| "epoch": 7.33, |
| "learning_rate": 5.891e-05, |
| "loss": 3.5493, |
| "step": 47000 |
| }, |
| { |
| "epoch": 7.49, |
| "learning_rate": 5.779888888888889e-05, |
| "loss": 3.554, |
| "step": 48000 |
| }, |
| { |
| "epoch": 7.65, |
| "learning_rate": 5.668888888888889e-05, |
| "loss": 3.5593, |
| "step": 49000 |
| }, |
| { |
| "epoch": 7.8, |
| "learning_rate": 5.5577777777777784e-05, |
| "loss": 3.5597, |
| "step": 50000 |
| }, |
| { |
| "epoch": 7.96, |
| "learning_rate": 5.446777777777778e-05, |
| "loss": 3.5598, |
| "step": 51000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.4002713794901274, |
| "eval_loss": 3.631843328475952, |
| "eval_runtime": 1.9682, |
| "eval_samples_per_second": 598.504, |
| "eval_steps_per_second": 5.081, |
| "step": 51264 |
| }, |
| { |
| "epoch": 8.11, |
| "learning_rate": 5.3356666666666663e-05, |
| "loss": 3.5064, |
| "step": 52000 |
| }, |
| { |
| "epoch": 8.27, |
| "learning_rate": 5.224666666666667e-05, |
| "loss": 3.5007, |
| "step": 53000 |
| }, |
| { |
| "epoch": 8.43, |
| "learning_rate": 5.1135555555555556e-05, |
| "loss": 3.511, |
| "step": 54000 |
| }, |
| { |
| "epoch": 8.58, |
| "learning_rate": 5.002555555555556e-05, |
| "loss": 3.5135, |
| "step": 55000 |
| }, |
| { |
| "epoch": 8.74, |
| "learning_rate": 4.891444444444444e-05, |
| "loss": 3.5193, |
| "step": 56000 |
| }, |
| { |
| "epoch": 8.9, |
| "learning_rate": 4.780444444444445e-05, |
| "loss": 3.5203, |
| "step": 57000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.4027378581072952, |
| "eval_loss": 3.6141819953918457, |
| "eval_runtime": 1.9786, |
| "eval_samples_per_second": 595.365, |
| "eval_steps_per_second": 5.054, |
| "step": 57672 |
| }, |
| { |
| "epoch": 9.05, |
| "learning_rate": 4.6693333333333336e-05, |
| "loss": 3.4957, |
| "step": 58000 |
| }, |
| { |
| "epoch": 9.21, |
| "learning_rate": 4.5583333333333335e-05, |
| "loss": 3.4579, |
| "step": 59000 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 4.447222222222223e-05, |
| "loss": 3.4682, |
| "step": 60000 |
| }, |
| { |
| "epoch": 9.52, |
| "learning_rate": 4.336222222222223e-05, |
| "loss": 3.4743, |
| "step": 61000 |
| }, |
| { |
| "epoch": 9.68, |
| "learning_rate": 4.2251111111111115e-05, |
| "loss": 3.4784, |
| "step": 62000 |
| }, |
| { |
| "epoch": 9.83, |
| "learning_rate": 4.1141111111111114e-05, |
| "loss": 3.4833, |
| "step": 63000 |
| }, |
| { |
| "epoch": 9.99, |
| "learning_rate": 4.003e-05, |
| "loss": 3.4839, |
| "step": 64000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.40503723112709383, |
| "eval_loss": 3.599998950958252, |
| "eval_runtime": 1.9622, |
| "eval_samples_per_second": 600.357, |
| "eval_steps_per_second": 5.096, |
| "step": 64080 |
| } |
| ], |
| "max_steps": 100000, |
| "num_train_epochs": 16, |
| "total_flos": 5.3573097259008e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|