| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.32, |
| "eval_steps": 500, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.016, |
| "grad_norm": 0.987720251083374, |
| "learning_rate": 3.997524171965045e-05, |
| "loss": 4.506, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 1.1270077228546143, |
| "learning_rate": 3.990002677172515e-05, |
| "loss": 4.1703, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 0.9228858351707458, |
| "learning_rate": 3.9774542629091646e-05, |
| "loss": 3.9996, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 0.9696429371833801, |
| "learning_rate": 3.9599106275584746e-05, |
| "loss": 3.9142, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 0.9501475691795349, |
| "learning_rate": 3.937416087865917e-05, |
| "loss": 3.8441, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 0.9516975283622742, |
| "learning_rate": 3.91002746699101e-05, |
| "loss": 3.7967, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 0.8569780588150024, |
| "learning_rate": 3.877813950967087e-05, |
| "loss": 3.7618, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 0.940584659576416, |
| "learning_rate": 3.8408569139313696e-05, |
| "loss": 3.6315, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 0.9224157333374023, |
| "learning_rate": 3.799249712566837e-05, |
| "loss": 3.6187, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 0.8730221390724182, |
| "learning_rate": 3.753097450275138e-05, |
| "loss": 3.5771, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 0.8778759837150574, |
| "learning_rate": 3.7025167116762844e-05, |
| "loss": 3.5414, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 1.054726004600525, |
| "learning_rate": 3.647635268105776e-05, |
| "loss": 3.5168, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 0.9288316965103149, |
| "learning_rate": 3.5885917548531206e-05, |
| "loss": 3.4751, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 1.053566813468933, |
| "learning_rate": 3.52553532095706e-05, |
| "loss": 3.487, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 0.9158502817153931, |
| "learning_rate": 3.458625252442156e-05, |
| "loss": 3.4344, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 1.1394481658935547, |
| "learning_rate": 3.388030569948466e-05, |
| "loss": 3.4369, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 0.8391066789627075, |
| "learning_rate": 3.313929601770737e-05, |
| "loss": 3.3938, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 0.8551831841468811, |
| "learning_rate": 3.23650953338566e-05, |
| "loss": 3.3531, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 0.8429365754127502, |
| "learning_rate": 3.155965934605104e-05, |
| "loss": 3.3752, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 0.998236358165741, |
| "learning_rate": 3.072502265549804e-05, |
| "loss": 3.3798, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 100, |
| "max_steps": 6250, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.4483807633408e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|