| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.67741935483871, |
| "eval_steps": 40, |
| "global_step": 150, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.6451612903225806, |
| "grad_norm": 0.8625742793083191, |
| "learning_rate": 4e-05, |
| "loss": 0.7057, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.2903225806451613, |
| "grad_norm": 0.7124119400978088, |
| "learning_rate": 8e-05, |
| "loss": 0.6741, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.935483870967742, |
| "grad_norm": 0.7693650126457214, |
| "learning_rate": 0.00012, |
| "loss": 0.5941, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.5806451612903225, |
| "grad_norm": 0.6899323463439941, |
| "learning_rate": 0.00016, |
| "loss": 0.547, |
| "step": 40 |
| }, |
| { |
| "epoch": 2.5806451612903225, |
| "eval_loss": 0.48405569791793823, |
| "eval_runtime": 2.1967, |
| "eval_samples_per_second": 0.455, |
| "eval_steps_per_second": 0.455, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.225806451612903, |
| "grad_norm": 0.564319908618927, |
| "learning_rate": 0.0002, |
| "loss": 0.5046, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.870967741935484, |
| "grad_norm": 0.6034739017486572, |
| "learning_rate": 0.00024, |
| "loss": 0.4842, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.516129032258064, |
| "grad_norm": 0.6102902293205261, |
| "learning_rate": 0.00028, |
| "loss": 0.4531, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.161290322580645, |
| "grad_norm": 0.7875820398330688, |
| "learning_rate": 0.00032, |
| "loss": 0.4389, |
| "step": 80 |
| }, |
| { |
| "epoch": 5.161290322580645, |
| "eval_loss": 0.4346591830253601, |
| "eval_runtime": 2.1674, |
| "eval_samples_per_second": 0.461, |
| "eval_steps_per_second": 0.461, |
| "step": 80 |
| }, |
| { |
| "epoch": 5.806451612903226, |
| "grad_norm": 0.8663121461868286, |
| "learning_rate": 0.00036, |
| "loss": 0.4177, |
| "step": 90 |
| }, |
| { |
| "epoch": 6.451612903225806, |
| "grad_norm": 0.9721035957336426, |
| "learning_rate": 0.0004, |
| "loss": 0.4046, |
| "step": 100 |
| }, |
| { |
| "epoch": 7.096774193548387, |
| "grad_norm": 0.9510289430618286, |
| "learning_rate": 0.00032, |
| "loss": 0.3788, |
| "step": 110 |
| }, |
| { |
| "epoch": 7.741935483870968, |
| "grad_norm": 1.0126413106918335, |
| "learning_rate": 0.00024, |
| "loss": 0.3673, |
| "step": 120 |
| }, |
| { |
| "epoch": 7.741935483870968, |
| "eval_loss": 0.4241188168525696, |
| "eval_runtime": 2.195, |
| "eval_samples_per_second": 0.456, |
| "eval_steps_per_second": 0.456, |
| "step": 120 |
| }, |
| { |
| "epoch": 8.387096774193548, |
| "grad_norm": 1.116309642791748, |
| "learning_rate": 0.00016, |
| "loss": 0.3449, |
| "step": 130 |
| }, |
| { |
| "epoch": 9.03225806451613, |
| "grad_norm": 0.9966815710067749, |
| "learning_rate": 8e-05, |
| "loss": 0.3338, |
| "step": 140 |
| }, |
| { |
| "epoch": 9.67741935483871, |
| "grad_norm": 1.0189791917800903, |
| "learning_rate": 0.0, |
| "loss": 0.3202, |
| "step": 150 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 150, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 40, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.946319309952778e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|