| { |
| "best_metric": 4.7215962409973145, |
| "best_model_checkpoint": "models/GPT2_no_function_42/checkpoint-47020", |
| "epoch": 10.0, |
| "global_step": 47020, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.21, |
| "learning_rate": 1e-05, |
| "loss": 8.0483, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 2e-05, |
| "loss": 7.0834, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 3e-05, |
| "loss": 6.6765, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 4e-05, |
| "loss": 6.3706, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.220326962593879, |
| "eval_loss": 6.010717391967773, |
| "eval_runtime": 1.4501, |
| "eval_samples_per_second": 597.198, |
| "eval_steps_per_second": 4.827, |
| "step": 4702 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 5e-05, |
| "loss": 6.1229, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 6e-05, |
| "loss": 5.9171, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 7e-05, |
| "loss": 5.7528, |
| "step": 7000 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 8e-05, |
| "loss": 5.6201, |
| "step": 8000 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 9e-05, |
| "loss": 5.5145, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.253105053554218, |
| "eval_loss": 5.362144470214844, |
| "eval_runtime": 1.4369, |
| "eval_samples_per_second": 602.691, |
| "eval_steps_per_second": 4.872, |
| "step": 9404 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 0.0001, |
| "loss": 5.4006, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 9.889000000000001e-05, |
| "loss": 5.3208, |
| "step": 11000 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 9.77788888888889e-05, |
| "loss": 5.2626, |
| "step": 12000 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 9.66688888888889e-05, |
| "loss": 5.2121, |
| "step": 13000 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 9.555777777777778e-05, |
| "loss": 5.1658, |
| "step": 14000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.2755269044025386, |
| "eval_loss": 5.0851593017578125, |
| "eval_runtime": 1.4343, |
| "eval_samples_per_second": 603.766, |
| "eval_steps_per_second": 4.88, |
| "step": 14106 |
| }, |
| { |
| "epoch": 3.19, |
| "learning_rate": 9.444777777777778e-05, |
| "loss": 5.0681, |
| "step": 15000 |
| }, |
| { |
| "epoch": 3.4, |
| "learning_rate": 9.333666666666667e-05, |
| "loss": 5.0459, |
| "step": 16000 |
| }, |
| { |
| "epoch": 3.62, |
| "learning_rate": 9.222666666666668e-05, |
| "loss": 5.0245, |
| "step": 17000 |
| }, |
| { |
| "epoch": 3.83, |
| "learning_rate": 9.111555555555556e-05, |
| "loss": 5.0051, |
| "step": 18000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.28648324271244385, |
| "eval_loss": 4.952297210693359, |
| "eval_runtime": 1.4391, |
| "eval_samples_per_second": 601.751, |
| "eval_steps_per_second": 4.864, |
| "step": 18808 |
| }, |
| { |
| "epoch": 4.04, |
| "learning_rate": 9.000555555555557e-05, |
| "loss": 4.9688, |
| "step": 19000 |
| }, |
| { |
| "epoch": 4.25, |
| "learning_rate": 8.889444444444444e-05, |
| "loss": 4.893, |
| "step": 20000 |
| }, |
| { |
| "epoch": 4.47, |
| "learning_rate": 8.778444444444445e-05, |
| "loss": 4.8916, |
| "step": 21000 |
| }, |
| { |
| "epoch": 4.68, |
| "learning_rate": 8.667333333333334e-05, |
| "loss": 4.8863, |
| "step": 22000 |
| }, |
| { |
| "epoch": 4.89, |
| "learning_rate": 8.556333333333334e-05, |
| "loss": 4.8788, |
| "step": 23000 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.29304795330144934, |
| "eval_loss": 4.870294094085693, |
| "eval_runtime": 1.4302, |
| "eval_samples_per_second": 605.492, |
| "eval_steps_per_second": 4.894, |
| "step": 23510 |
| }, |
| { |
| "epoch": 5.1, |
| "learning_rate": 8.445222222222222e-05, |
| "loss": 4.8231, |
| "step": 24000 |
| }, |
| { |
| "epoch": 5.32, |
| "learning_rate": 8.334222222222222e-05, |
| "loss": 4.7841, |
| "step": 25000 |
| }, |
| { |
| "epoch": 5.53, |
| "learning_rate": 8.223111111111111e-05, |
| "loss": 4.7904, |
| "step": 26000 |
| }, |
| { |
| "epoch": 5.74, |
| "learning_rate": 8.112111111111111e-05, |
| "loss": 4.7869, |
| "step": 27000 |
| }, |
| { |
| "epoch": 5.95, |
| "learning_rate": 8.001e-05, |
| "loss": 4.7861, |
| "step": 28000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.29845792947936933, |
| "eval_loss": 4.819924354553223, |
| "eval_runtime": 1.4319, |
| "eval_samples_per_second": 604.778, |
| "eval_steps_per_second": 4.889, |
| "step": 28212 |
| }, |
| { |
| "epoch": 6.17, |
| "learning_rate": 7.890000000000001e-05, |
| "loss": 4.7083, |
| "step": 29000 |
| }, |
| { |
| "epoch": 6.38, |
| "learning_rate": 7.77888888888889e-05, |
| "loss": 4.7059, |
| "step": 30000 |
| }, |
| { |
| "epoch": 6.59, |
| "learning_rate": 7.667888888888889e-05, |
| "loss": 4.7111, |
| "step": 31000 |
| }, |
| { |
| "epoch": 6.81, |
| "learning_rate": 7.556777777777779e-05, |
| "loss": 4.7142, |
| "step": 32000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.30186757833099964, |
| "eval_loss": 4.782296657562256, |
| "eval_runtime": 1.4327, |
| "eval_samples_per_second": 604.45, |
| "eval_steps_per_second": 4.886, |
| "step": 32914 |
| }, |
| { |
| "epoch": 7.02, |
| "learning_rate": 7.445777777777778e-05, |
| "loss": 4.7075, |
| "step": 33000 |
| }, |
| { |
| "epoch": 7.23, |
| "learning_rate": 7.334666666666668e-05, |
| "loss": 4.6238, |
| "step": 34000 |
| }, |
| { |
| "epoch": 7.44, |
| "learning_rate": 7.223666666666667e-05, |
| "loss": 4.6427, |
| "step": 35000 |
| }, |
| { |
| "epoch": 7.66, |
| "learning_rate": 7.112555555555556e-05, |
| "loss": 4.6487, |
| "step": 36000 |
| }, |
| { |
| "epoch": 7.87, |
| "learning_rate": 7.001555555555555e-05, |
| "loss": 4.6556, |
| "step": 37000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.30428615591642266, |
| "eval_loss": 4.755917072296143, |
| "eval_runtime": 1.4333, |
| "eval_samples_per_second": 604.206, |
| "eval_steps_per_second": 4.884, |
| "step": 37616 |
| }, |
| { |
| "epoch": 8.08, |
| "learning_rate": 6.890444444444445e-05, |
| "loss": 4.6189, |
| "step": 38000 |
| }, |
| { |
| "epoch": 8.29, |
| "learning_rate": 6.779444444444444e-05, |
| "loss": 4.5731, |
| "step": 39000 |
| }, |
| { |
| "epoch": 8.51, |
| "learning_rate": 6.668333333333333e-05, |
| "loss": 4.5902, |
| "step": 40000 |
| }, |
| { |
| "epoch": 8.72, |
| "learning_rate": 6.557333333333332e-05, |
| "loss": 4.5992, |
| "step": 41000 |
| }, |
| { |
| "epoch": 8.93, |
| "learning_rate": 6.446222222222223e-05, |
| "loss": 4.6029, |
| "step": 42000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.3071047989671037, |
| "eval_loss": 4.737186431884766, |
| "eval_runtime": 1.4325, |
| "eval_samples_per_second": 604.527, |
| "eval_steps_per_second": 4.886, |
| "step": 42318 |
| }, |
| { |
| "epoch": 9.15, |
| "learning_rate": 6.335222222222222e-05, |
| "loss": 4.5398, |
| "step": 43000 |
| }, |
| { |
| "epoch": 9.36, |
| "learning_rate": 6.224111111111112e-05, |
| "loss": 4.528, |
| "step": 44000 |
| }, |
| { |
| "epoch": 9.57, |
| "learning_rate": 6.113111111111111e-05, |
| "loss": 4.5451, |
| "step": 45000 |
| }, |
| { |
| "epoch": 9.78, |
| "learning_rate": 6.002e-05, |
| "loss": 4.5521, |
| "step": 46000 |
| }, |
| { |
| "epoch": 10.0, |
| "learning_rate": 5.891e-05, |
| "loss": 4.5609, |
| "step": 47000 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.30925060464439635, |
| "eval_loss": 4.7215962409973145, |
| "eval_runtime": 1.4366, |
| "eval_samples_per_second": 602.829, |
| "eval_steps_per_second": 4.873, |
| "step": 47020 |
| } |
| ], |
| "max_steps": 100000, |
| "num_train_epochs": 22, |
| "total_flos": 3.930942652416e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|