| { | |
| "best_global_step": 196, | |
| "best_metric": 1.4676804542541504, | |
| "best_model_checkpoint": "./models/codet5-sequenced/checkpoint-196", | |
| "epoch": 13.067796610169491, | |
| "eval_steps": 7, | |
| "global_step": 196, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06779661016949153, | |
| "grad_norm": 35.061439514160156, | |
| "learning_rate": 0.0, | |
| "loss": 6.9968, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.4745762711864407, | |
| "eval_loss": 3.7553770542144775, | |
| "eval_runtime": 31.376, | |
| "eval_samples_per_second": 4.143, | |
| "eval_steps_per_second": 0.223, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.6779661016949152, | |
| "grad_norm": 2.471144676208496, | |
| "learning_rate": 0.00012857142857142855, | |
| "loss": 4.9711, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.9491525423728814, | |
| "eval_loss": 2.576092481613159, | |
| "eval_runtime": 32.5287, | |
| "eval_samples_per_second": 3.996, | |
| "eval_steps_per_second": 0.215, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 1.3389830508474576, | |
| "grad_norm": 0.970221757888794, | |
| "learning_rate": 0.0002714285714285714, | |
| "loss": 2.7955, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.4067796610169492, | |
| "eval_loss": 2.0272090435028076, | |
| "eval_runtime": 32.8232, | |
| "eval_samples_per_second": 3.961, | |
| "eval_steps_per_second": 0.213, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 1.8813559322033897, | |
| "eval_loss": 1.794872760772705, | |
| "eval_runtime": 32.1072, | |
| "eval_samples_per_second": 4.049, | |
| "eval_steps_per_second": 0.218, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.253476619720459, | |
| "learning_rate": 0.00028823529411764703, | |
| "loss": 1.9943, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.3389830508474576, | |
| "eval_loss": 1.6869820356369019, | |
| "eval_runtime": 33.6053, | |
| "eval_samples_per_second": 3.868, | |
| "eval_steps_per_second": 0.208, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 2.6779661016949152, | |
| "grad_norm": 0.17588205635547638, | |
| "learning_rate": 0.00027352941176470583, | |
| "loss": 1.7476, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.8135593220338984, | |
| "eval_loss": 1.633076548576355, | |
| "eval_runtime": 31.3432, | |
| "eval_samples_per_second": 4.148, | |
| "eval_steps_per_second": 0.223, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 3.2711864406779663, | |
| "eval_loss": 1.595025658607483, | |
| "eval_runtime": 32.6382, | |
| "eval_samples_per_second": 3.983, | |
| "eval_steps_per_second": 0.214, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 3.3389830508474576, | |
| "grad_norm": 0.1354854702949524, | |
| "learning_rate": 0.0002588235294117647, | |
| "loss": 1.6567, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.7457627118644066, | |
| "eval_loss": 1.5682227611541748, | |
| "eval_runtime": 32.3796, | |
| "eval_samples_per_second": 4.015, | |
| "eval_steps_per_second": 0.216, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 0.14623650908470154, | |
| "learning_rate": 0.0002441176470588235, | |
| "loss": 1.6035, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.203389830508475, | |
| "eval_loss": 1.5545539855957031, | |
| "eval_runtime": 32.9074, | |
| "eval_samples_per_second": 3.95, | |
| "eval_steps_per_second": 0.213, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 4.677966101694915, | |
| "grad_norm": 0.08996161818504333, | |
| "learning_rate": 0.0002294117647058823, | |
| "loss": 1.5737, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.677966101694915, | |
| "eval_loss": 1.5401841402053833, | |
| "eval_runtime": 36.4472, | |
| "eval_samples_per_second": 3.567, | |
| "eval_steps_per_second": 0.192, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 5.135593220338983, | |
| "eval_loss": 1.5304237604141235, | |
| "eval_runtime": 33.0897, | |
| "eval_samples_per_second": 3.929, | |
| "eval_steps_per_second": 0.212, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 5.338983050847458, | |
| "grad_norm": 0.12378664314746857, | |
| "learning_rate": 0.00021470588235294116, | |
| "loss": 1.5575, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 5.610169491525424, | |
| "eval_loss": 1.5213029384613037, | |
| "eval_runtime": 31.2672, | |
| "eval_samples_per_second": 4.158, | |
| "eval_steps_per_second": 0.224, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 0.1171395406126976, | |
| "learning_rate": 0.00019999999999999998, | |
| "loss": 1.5386, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.067796610169491, | |
| "eval_loss": 1.5130056142807007, | |
| "eval_runtime": 33.1459, | |
| "eval_samples_per_second": 3.922, | |
| "eval_steps_per_second": 0.211, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 6.5423728813559325, | |
| "eval_loss": 1.5062706470489502, | |
| "eval_runtime": 32.4291, | |
| "eval_samples_per_second": 4.009, | |
| "eval_steps_per_second": 0.216, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 6.677966101694915, | |
| "grad_norm": 0.06857075542211533, | |
| "learning_rate": 0.0001852941176470588, | |
| "loss": 1.5288, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.5011202096939087, | |
| "eval_runtime": 32.7571, | |
| "eval_samples_per_second": 3.969, | |
| "eval_steps_per_second": 0.214, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 7.338983050847458, | |
| "grad_norm": 0.07266195118427277, | |
| "learning_rate": 0.00017058823529411763, | |
| "loss": 1.5196, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 7.47457627118644, | |
| "eval_loss": 1.49434232711792, | |
| "eval_runtime": 33.5153, | |
| "eval_samples_per_second": 3.879, | |
| "eval_steps_per_second": 0.209, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 7.9491525423728815, | |
| "eval_loss": 1.493066430091858, | |
| "eval_runtime": 32.6462, | |
| "eval_samples_per_second": 3.982, | |
| "eval_steps_per_second": 0.214, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 0.09790726006031036, | |
| "learning_rate": 0.00015588235294117646, | |
| "loss": 1.5098, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.40677966101695, | |
| "eval_loss": 1.4890639781951904, | |
| "eval_runtime": 33.9906, | |
| "eval_samples_per_second": 3.825, | |
| "eval_steps_per_second": 0.206, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 8.677966101694915, | |
| "grad_norm": 0.08605582267045975, | |
| "learning_rate": 0.00014117647058823528, | |
| "loss": 1.504, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 8.88135593220339, | |
| "eval_loss": 1.4857922792434692, | |
| "eval_runtime": 33.6557, | |
| "eval_samples_per_second": 3.863, | |
| "eval_steps_per_second": 0.208, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 9.338983050847457, | |
| "grad_norm": 0.07982663810253143, | |
| "learning_rate": 0.0001264705882352941, | |
| "loss": 1.501, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 9.338983050847457, | |
| "eval_loss": 1.4824906587600708, | |
| "eval_runtime": 33.5066, | |
| "eval_samples_per_second": 3.88, | |
| "eval_steps_per_second": 0.209, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 9.813559322033898, | |
| "eval_loss": 1.478973150253296, | |
| "eval_runtime": 33.5484, | |
| "eval_samples_per_second": 3.875, | |
| "eval_steps_per_second": 0.209, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 0.07402677834033966, | |
| "learning_rate": 0.00011176470588235293, | |
| "loss": 1.4941, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.271186440677965, | |
| "eval_loss": 1.476096749305725, | |
| "eval_runtime": 33.087, | |
| "eval_samples_per_second": 3.929, | |
| "eval_steps_per_second": 0.212, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 10.677966101694915, | |
| "grad_norm": 0.0733569785952568, | |
| "learning_rate": 9.705882352941176e-05, | |
| "loss": 1.4894, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 10.745762711864407, | |
| "eval_loss": 1.4757354259490967, | |
| "eval_runtime": 33.5743, | |
| "eval_samples_per_second": 3.872, | |
| "eval_steps_per_second": 0.208, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 11.203389830508474, | |
| "eval_loss": 1.4727975130081177, | |
| "eval_runtime": 33.5918, | |
| "eval_samples_per_second": 3.87, | |
| "eval_steps_per_second": 0.208, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 11.338983050847457, | |
| "grad_norm": 0.07810712605714798, | |
| "learning_rate": 8.23529411764706e-05, | |
| "loss": 1.4887, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 11.677966101694915, | |
| "eval_loss": 1.4703751802444458, | |
| "eval_runtime": 33.6902, | |
| "eval_samples_per_second": 3.859, | |
| "eval_steps_per_second": 0.208, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 0.0885886400938034, | |
| "learning_rate": 6.76470588235294e-05, | |
| "loss": 1.4844, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 12.135593220338983, | |
| "eval_loss": 1.4694443941116333, | |
| "eval_runtime": 33.5918, | |
| "eval_samples_per_second": 3.87, | |
| "eval_steps_per_second": 0.208, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 12.610169491525424, | |
| "eval_loss": 1.47074556350708, | |
| "eval_runtime": 33.0341, | |
| "eval_samples_per_second": 3.935, | |
| "eval_steps_per_second": 0.212, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 12.677966101694915, | |
| "grad_norm": 0.07929002493619919, | |
| "learning_rate": 5.294117647058824e-05, | |
| "loss": 1.48, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 13.067796610169491, | |
| "eval_loss": 1.4676804542541504, | |
| "eval_runtime": 33.3819, | |
| "eval_samples_per_second": 3.894, | |
| "eval_steps_per_second": 0.21, | |
| "step": 196 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 225, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 15, | |
| "save_steps": 28, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 10, | |
| "early_stopping_threshold": 0.001 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2586720180633600.0, | |
| "train_batch_size": 20, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |