{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.991869918699187, "eval_steps": 500, "global_step": 276, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10840108401084012, "grad_norm": 1.2238898092954031, "learning_rate": 5e-06, "loss": 1.1139, "step": 10 }, { "epoch": 0.21680216802168023, "grad_norm": 0.7661987078288646, "learning_rate": 5e-06, "loss": 1.0234, "step": 20 }, { "epoch": 0.3252032520325203, "grad_norm": 0.6931478916835131, "learning_rate": 5e-06, "loss": 1.0067, "step": 30 }, { "epoch": 0.43360433604336046, "grad_norm": 0.6288766129904495, "learning_rate": 5e-06, "loss": 1.0004, "step": 40 }, { "epoch": 0.5420054200542005, "grad_norm": 0.6511705997239466, "learning_rate": 5e-06, "loss": 0.9872, "step": 50 }, { "epoch": 0.6504065040650406, "grad_norm": 0.6237606284279241, "learning_rate": 5e-06, "loss": 0.983, "step": 60 }, { "epoch": 0.7588075880758808, "grad_norm": 0.6061811934000912, "learning_rate": 5e-06, "loss": 0.977, "step": 70 }, { "epoch": 0.8672086720867209, "grad_norm": 0.5691567098461193, "learning_rate": 5e-06, "loss": 0.9609, "step": 80 }, { "epoch": 0.975609756097561, "grad_norm": 0.579566994994573, "learning_rate": 5e-06, "loss": 0.967, "step": 90 }, { "epoch": 0.997289972899729, "eval_loss": 0.9647226333618164, "eval_runtime": 97.7674, "eval_samples_per_second": 25.417, "eval_steps_per_second": 0.399, "step": 92 }, { "epoch": 1.084010840108401, "grad_norm": 0.7893915919770154, "learning_rate": 5e-06, "loss": 1.0085, "step": 100 }, { "epoch": 1.1924119241192412, "grad_norm": 0.6881452344372211, "learning_rate": 5e-06, "loss": 0.9266, "step": 110 }, { "epoch": 1.3008130081300813, "grad_norm": 0.7094638595829068, "learning_rate": 5e-06, "loss": 0.9176, "step": 120 }, { "epoch": 1.4092140921409215, "grad_norm": 0.757737014933452, "learning_rate": 5e-06, "loss": 0.9165, "step": 130 }, { "epoch": 1.5176151761517616, "grad_norm": 0.7420578994185969, "learning_rate": 5e-06, "loss": 0.9178, "step": 140 }, { "epoch": 1.6260162601626016, "grad_norm": 0.6872431636486115, "learning_rate": 5e-06, "loss": 0.9095, "step": 150 }, { "epoch": 1.7344173441734418, "grad_norm": 0.6839268163247622, "learning_rate": 5e-06, "loss": 0.922, "step": 160 }, { "epoch": 1.8428184281842819, "grad_norm": 0.6646443132529639, "learning_rate": 5e-06, "loss": 0.9161, "step": 170 }, { "epoch": 1.951219512195122, "grad_norm": 0.7695800503499497, "learning_rate": 5e-06, "loss": 0.917, "step": 180 }, { "epoch": 1.994579945799458, "eval_loss": 0.9558340907096863, "eval_runtime": 98.5336, "eval_samples_per_second": 25.22, "eval_steps_per_second": 0.396, "step": 184 }, { "epoch": 2.059620596205962, "grad_norm": 0.8673539202787472, "learning_rate": 5e-06, "loss": 0.9576, "step": 190 }, { "epoch": 2.168021680216802, "grad_norm": 0.6595504985017855, "learning_rate": 5e-06, "loss": 0.8711, "step": 200 }, { "epoch": 2.2764227642276422, "grad_norm": 0.7339078950029295, "learning_rate": 5e-06, "loss": 0.8692, "step": 210 }, { "epoch": 2.3848238482384825, "grad_norm": 0.7590090712265437, "learning_rate": 5e-06, "loss": 0.8727, "step": 220 }, { "epoch": 2.4932249322493227, "grad_norm": 0.6567177012645716, "learning_rate": 5e-06, "loss": 0.8714, "step": 230 }, { "epoch": 2.6016260162601625, "grad_norm": 0.7927473560858851, "learning_rate": 5e-06, "loss": 0.8675, "step": 240 }, { "epoch": 2.710027100271003, "grad_norm": 0.6928632841645139, "learning_rate": 5e-06, "loss": 0.8694, "step": 250 }, { "epoch": 2.818428184281843, "grad_norm": 0.7142607867077723, "learning_rate": 5e-06, "loss": 0.8728, "step": 260 }, { "epoch": 2.926829268292683, "grad_norm": 0.7218062765913728, "learning_rate": 5e-06, "loss": 0.8735, "step": 270 }, { "epoch": 2.991869918699187, "eval_loss": 0.9591670632362366, "eval_runtime": 96.4736, "eval_samples_per_second": 25.758, "eval_steps_per_second": 0.404, "step": 276 }, { "epoch": 2.991869918699187, "step": 276, "total_flos": 462100900085760.0, "train_loss": 0.935515574786974, "train_runtime": 16221.6629, "train_samples_per_second": 8.73, "train_steps_per_second": 0.017 } ], "logging_steps": 10, "max_steps": 276, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 462100900085760.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }