| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.999829030603522, |
| "eval_steps": 250, |
| "global_step": 1462, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.05060694135749701, |
| "grad_norm": 27.736337661743164, |
| "learning_rate": 4.965986394557823e-07, |
| "loss": 2.027, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.10121388271499401, |
| "grad_norm": 10.546476364135742, |
| "learning_rate": 1e-06, |
| "loss": 1.3623, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.15182082407249103, |
| "grad_norm": 2.5816051959991455, |
| "learning_rate": 9.437262357414448e-07, |
| "loss": 0.3715, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.17096939647803044, |
| "eval_loss": 0.2821245491504669, |
| "eval_runtime": 29.3471, |
| "eval_samples_per_second": 17.037, |
| "eval_steps_per_second": 8.519, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.20242776542998803, |
| "grad_norm": 3.7410600185394287, |
| "learning_rate": 8.874524714828897e-07, |
| "loss": 0.2479, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.253034706787485, |
| "grad_norm": 2.1482808589935303, |
| "learning_rate": 8.311787072243346e-07, |
| "loss": 0.2056, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.30364164814498207, |
| "grad_norm": 1.6402554512023926, |
| "learning_rate": 7.749049429657795e-07, |
| "loss": 0.1642, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.3419387929560609, |
| "eval_loss": 0.17338356375694275, |
| "eval_runtime": 29.3658, |
| "eval_samples_per_second": 17.027, |
| "eval_steps_per_second": 8.513, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.35424858950247906, |
| "grad_norm": 2.480368137359619, |
| "learning_rate": 7.186311787072243e-07, |
| "loss": 0.1423, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.40485553085997605, |
| "grad_norm": 2.045980453491211, |
| "learning_rate": 6.623574144486692e-07, |
| "loss": 0.1511, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.45546247221747305, |
| "grad_norm": 2.9521405696868896, |
| "learning_rate": 6.060836501901141e-07, |
| "loss": 0.1454, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.50606941357497, |
| "grad_norm": 1.8296291828155518, |
| "learning_rate": 5.498098859315589e-07, |
| "loss": 0.1431, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5129081894340913, |
| "eval_loss": 0.15121176838874817, |
| "eval_runtime": 29.3541, |
| "eval_samples_per_second": 17.033, |
| "eval_steps_per_second": 8.517, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.5566763549324671, |
| "grad_norm": 1.909769892692566, |
| "learning_rate": 4.935361216730038e-07, |
| "loss": 0.1339, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.6072832962899641, |
| "grad_norm": 2.23007869720459, |
| "learning_rate": 4.372623574144487e-07, |
| "loss": 0.1281, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.6578902376474611, |
| "grad_norm": 2.860929250717163, |
| "learning_rate": 3.8098859315589356e-07, |
| "loss": 0.1257, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.6838775859121218, |
| "eval_loss": 0.14153704047203064, |
| "eval_runtime": 29.3594, |
| "eval_samples_per_second": 17.03, |
| "eval_steps_per_second": 8.515, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7084971790049581, |
| "grad_norm": 3.5633513927459717, |
| "learning_rate": 3.247148288973384e-07, |
| "loss": 0.1332, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.7591041203624551, |
| "grad_norm": 3.4660592079162598, |
| "learning_rate": 2.6844106463878326e-07, |
| "loss": 0.1174, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.8097110617199521, |
| "grad_norm": 2.523714065551758, |
| "learning_rate": 2.1216730038022811e-07, |
| "loss": 0.1281, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.8548469823901521, |
| "eval_loss": 0.13589061796665192, |
| "eval_runtime": 29.3628, |
| "eval_samples_per_second": 17.028, |
| "eval_steps_per_second": 8.514, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.8603180030774491, |
| "grad_norm": 5.009237289428711, |
| "learning_rate": 1.55893536121673e-07, |
| "loss": 0.1097, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.9109249444349461, |
| "grad_norm": 1.92766535282135, |
| "learning_rate": 9.961977186311786e-08, |
| "loss": 0.1246, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.9615318857924432, |
| "grad_norm": 2.294616222381592, |
| "learning_rate": 4.3346007604562734e-08, |
| "loss": 0.1079, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.999829030603522, |
| "step": 1462, |
| "total_flos": 1.0635211481481216e+17, |
| "train_loss": 0.3117764619078421, |
| "train_runtime": 2245.6979, |
| "train_samples_per_second": 5.209, |
| "train_steps_per_second": 0.651 |
| } |
| ], |
| "logging_steps": 74, |
| "max_steps": 1462, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0635211481481216e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|