{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8032128514056225, "eval_steps": 5, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01606425702811245, "eval_loss": 6.64418363571167, "eval_runtime": 0.6161, "eval_samples_per_second": 170.42, "eval_steps_per_second": 22.723, "step": 1 }, { "epoch": 0.04819277108433735, "grad_norm": 6.182103633880615, "learning_rate": 3e-05, "loss": 6.1901, "step": 3 }, { "epoch": 0.08032128514056225, "eval_loss": 6.533019065856934, "eval_runtime": 0.6137, "eval_samples_per_second": 171.087, "eval_steps_per_second": 22.812, "step": 5 }, { "epoch": 0.0963855421686747, "grad_norm": 5.5103912353515625, "learning_rate": 6e-05, "loss": 6.1677, "step": 6 }, { "epoch": 0.14457831325301204, "grad_norm": 5.758279323577881, "learning_rate": 9e-05, "loss": 6.1421, "step": 9 }, { "epoch": 0.1606425702811245, "eval_loss": 6.087296009063721, "eval_runtime": 0.612, "eval_samples_per_second": 171.564, "eval_steps_per_second": 22.875, "step": 10 }, { "epoch": 0.1927710843373494, "grad_norm": 6.739719390869141, "learning_rate": 9.938441702975689e-05, "loss": 5.7648, "step": 12 }, { "epoch": 0.24096385542168675, "grad_norm": 4.701174259185791, "learning_rate": 9.619397662556435e-05, "loss": 5.9243, "step": 15 }, { "epoch": 0.24096385542168675, "eval_loss": 5.733823299407959, "eval_runtime": 0.6162, "eval_samples_per_second": 170.403, "eval_steps_per_second": 22.72, "step": 15 }, { "epoch": 0.2891566265060241, "grad_norm": 5.849489688873291, "learning_rate": 9.045084971874738e-05, "loss": 5.448, "step": 18 }, { "epoch": 0.321285140562249, "eval_loss": 5.51096248626709, "eval_runtime": 0.6135, "eval_samples_per_second": 171.16, "eval_steps_per_second": 22.821, "step": 20 }, { "epoch": 0.3373493975903614, "grad_norm": 5.229000091552734, "learning_rate": 8.247240241650918e-05, "loss": 5.6213, "step": 21 }, { "epoch": 0.3855421686746988, "grad_norm": 5.692105770111084, "learning_rate": 7.269952498697734e-05, "loss": 5.3913, "step": 24 }, { "epoch": 0.40160642570281124, "eval_loss": 5.345283508300781, "eval_runtime": 0.6101, "eval_samples_per_second": 172.09, "eval_steps_per_second": 22.945, "step": 25 }, { "epoch": 0.43373493975903615, "grad_norm": 5.2464704513549805, "learning_rate": 6.167226819279528e-05, "loss": 5.5785, "step": 27 }, { "epoch": 0.4819277108433735, "grad_norm": 5.426384925842285, "learning_rate": 5e-05, "loss": 5.2095, "step": 30 }, { "epoch": 0.4819277108433735, "eval_loss": 5.21764612197876, "eval_runtime": 0.6151, "eval_samples_per_second": 170.698, "eval_steps_per_second": 22.76, "step": 30 }, { "epoch": 0.5301204819277109, "grad_norm": 4.646877765655518, "learning_rate": 3.832773180720475e-05, "loss": 5.1446, "step": 33 }, { "epoch": 0.5622489959839357, "eval_loss": 5.134397506713867, "eval_runtime": 0.6146, "eval_samples_per_second": 170.839, "eval_steps_per_second": 22.778, "step": 35 }, { "epoch": 0.5783132530120482, "grad_norm": 4.9829936027526855, "learning_rate": 2.7300475013022663e-05, "loss": 5.1313, "step": 36 }, { "epoch": 0.6265060240963856, "grad_norm": 6.772069454193115, "learning_rate": 1.7527597583490822e-05, "loss": 5.0577, "step": 39 }, { "epoch": 0.642570281124498, "eval_loss": 5.084665298461914, "eval_runtime": 0.6128, "eval_samples_per_second": 171.356, "eval_steps_per_second": 22.848, "step": 40 }, { "epoch": 0.6746987951807228, "grad_norm": 6.180466651916504, "learning_rate": 9.549150281252633e-06, "loss": 5.132, "step": 42 }, { "epoch": 0.7228915662650602, "grad_norm": 4.479893207550049, "learning_rate": 3.8060233744356633e-06, "loss": 5.2261, "step": 45 }, { "epoch": 0.7228915662650602, "eval_loss": 5.069843292236328, "eval_runtime": 0.6128, "eval_samples_per_second": 171.356, "eval_steps_per_second": 22.847, "step": 45 }, { "epoch": 0.7710843373493976, "grad_norm": 5.148656845092773, "learning_rate": 6.15582970243117e-07, "loss": 4.825, "step": 48 }, { "epoch": 0.8032128514056225, "eval_loss": 5.06525182723999, "eval_runtime": 0.6142, "eval_samples_per_second": 170.963, "eval_steps_per_second": 22.795, "step": 50 } ], "logging_steps": 3, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 32706134016000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }