| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9957081545064378, |
| "eval_steps": 1000, |
| "global_step": 174, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05722460658082976, |
| "grad_norm": 2.8843040386286187, |
| "learning_rate": 1e-05, |
| "loss": 1.2427, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.11444921316165951, |
| "grad_norm": 0.9017664305111632, |
| "learning_rate": 9.909643486313533e-06, |
| "loss": 0.5328, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.17167381974248927, |
| "grad_norm": 0.797068637259448, |
| "learning_rate": 9.641839665080363e-06, |
| "loss": 0.4447, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.22889842632331903, |
| "grad_norm": 0.6941745476559829, |
| "learning_rate": 9.206267664155906e-06, |
| "loss": 0.4017, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.2861230329041488, |
| "grad_norm": 0.6250591769825486, |
| "learning_rate": 8.61867019052535e-06, |
| "loss": 0.3776, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.34334763948497854, |
| "grad_norm": 0.6177949791797019, |
| "learning_rate": 7.900284547855992e-06, |
| "loss": 0.3662, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.4005722460658083, |
| "grad_norm": 0.6776944915753278, |
| "learning_rate": 7.0770750650094335e-06, |
| "loss": 0.3658, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.45779685264663805, |
| "grad_norm": 0.7027875485112575, |
| "learning_rate": 6.178794677547138e-06, |
| "loss": 0.3553, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.5150214592274678, |
| "grad_norm": 0.6301404025031555, |
| "learning_rate": 5.237909579118713e-06, |
| "loss": 0.3317, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.5722460658082976, |
| "grad_norm": 0.6840316408079424, |
| "learning_rate": 4.2884258086335755e-06, |
| "loss": 0.3428, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6294706723891274, |
| "grad_norm": 0.6028809547153681, |
| "learning_rate": 3.3646601834128924e-06, |
| "loss": 0.331, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.6866952789699571, |
| "grad_norm": 0.6463003155720976, |
| "learning_rate": 2.5000000000000015e-06, |
| "loss": 0.3252, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.7439198855507868, |
| "grad_norm": 0.5511586120399353, |
| "learning_rate": 1.7256963302735752e-06, |
| "loss": 0.3167, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.8011444921316166, |
| "grad_norm": 0.6770595845177203, |
| "learning_rate": 1.0697345262860638e-06, |
| "loss": 0.326, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.8583690987124464, |
| "grad_norm": 0.5474842641924246, |
| "learning_rate": 5.558227567253832e-07, |
| "loss": 0.3133, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.9155937052932761, |
| "grad_norm": 0.5872523545390601, |
| "learning_rate": 2.0253513192751374e-07, |
| "loss": 0.3195, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.9728183118741058, |
| "grad_norm": 0.5924289357218945, |
| "learning_rate": 2.264038713457706e-08, |
| "loss": 0.3216, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.9957081545064378, |
| "step": 174, |
| "total_flos": 599683779002368.0, |
| "train_loss": 0.40995535357245083, |
| "train_runtime": 10019.3178, |
| "train_samples_per_second": 2.232, |
| "train_steps_per_second": 0.017 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 174, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 599683779002368.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|