{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.373134328358209, "eval_steps": 500, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.018656716417910446, "grad_norm": 2.403158187866211, "learning_rate": 4.9998282347929784e-05, "loss": 3.3875, "step": 5 }, { "epoch": 0.03731343283582089, "grad_norm": 2.301710367202759, "learning_rate": 4.99931296277454e-05, "loss": 2.9015, "step": 10 }, { "epoch": 0.055970149253731345, "grad_norm": 1.271048665046692, "learning_rate": 4.998454254749331e-05, "loss": 2.6229, "step": 15 }, { "epoch": 0.07462686567164178, "grad_norm": 1.069893717765808, "learning_rate": 4.997252228714279e-05, "loss": 2.3704, "step": 20 }, { "epoch": 0.09328358208955224, "grad_norm": 0.9044906497001648, "learning_rate": 4.9957070498423854e-05, "loss": 2.3782, "step": 25 }, { "epoch": 0.11194029850746269, "grad_norm": 0.9635376334190369, "learning_rate": 4.993818930460026e-05, "loss": 2.3576, "step": 30 }, { "epoch": 0.13059701492537312, "grad_norm": 0.8513979315757751, "learning_rate": 4.9915881300177725e-05, "loss": 2.4603, "step": 35 }, { "epoch": 0.14925373134328357, "grad_norm": 0.845267117023468, "learning_rate": 4.9890149550547454e-05, "loss": 2.2033, "step": 40 }, { "epoch": 0.16791044776119404, "grad_norm": 0.6632418036460876, "learning_rate": 4.98609975915649e-05, "loss": 2.1851, "step": 45 }, { "epoch": 0.1865671641791045, "grad_norm": 0.6857479810714722, "learning_rate": 4.982842942906386e-05, "loss": 2.3592, "step": 50 }, { "epoch": 0.20522388059701493, "grad_norm": 0.7204287648200989, "learning_rate": 4.979244953830608e-05, "loss": 2.1323, "step": 55 }, { "epoch": 0.22388059701492538, "grad_norm": 0.6864420175552368, "learning_rate": 4.9753062863366276e-05, "loss": 2.2138, "step": 60 }, { "epoch": 0.24253731343283583, "grad_norm": 0.7536088228225708, "learning_rate": 4.971027481645274e-05, "loss": 2.2584, "step": 65 }, { "epoch": 0.26119402985074625, "grad_norm": 0.9708526134490967, "learning_rate": 4.966409127716367e-05, "loss": 2.2669, "step": 70 }, { "epoch": 0.2798507462686567, "grad_norm": 0.7516190409660339, "learning_rate": 4.96145185916792e-05, "loss": 2.2133, "step": 75 }, { "epoch": 0.29850746268656714, "grad_norm": 0.7864778637886047, "learning_rate": 4.95615635718894e-05, "loss": 2.1683, "step": 80 }, { "epoch": 0.31716417910447764, "grad_norm": 0.7846741080284119, "learning_rate": 4.950523349445824e-05, "loss": 2.1274, "step": 85 }, { "epoch": 0.3358208955223881, "grad_norm": 0.816838800907135, "learning_rate": 4.944553609982363e-05, "loss": 2.2033, "step": 90 }, { "epoch": 0.35447761194029853, "grad_norm": 0.7661916017532349, "learning_rate": 4.938247959113386e-05, "loss": 2.1492, "step": 95 }, { "epoch": 0.373134328358209, "grad_norm": 0.8964986205101013, "learning_rate": 4.931607263312032e-05, "loss": 2.0862, "step": 100 } ], "logging_steps": 5, "max_steps": 1340, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.0457337537390182e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }