{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9813084112149533, "eval_steps": 500, "global_step": 106, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09345794392523364, "grad_norm": 0.37823450565338135, "learning_rate": 1.9890401873221642e-05, "loss": 2.943, "step": 5 }, { "epoch": 0.18691588785046728, "grad_norm": 0.3274012804031372, "learning_rate": 1.9564009842765225e-05, "loss": 3.0203, "step": 10 }, { "epoch": 0.2803738317757009, "grad_norm": 0.2663849890232086, "learning_rate": 1.9027978299657436e-05, "loss": 2.8306, "step": 15 }, { "epoch": 0.37383177570093457, "grad_norm": 0.2998671233654022, "learning_rate": 1.829405685450202e-05, "loss": 2.9983, "step": 20 }, { "epoch": 0.4672897196261682, "grad_norm": 0.30643677711486816, "learning_rate": 1.7378332790417275e-05, "loss": 3.0059, "step": 25 }, { "epoch": 0.5607476635514018, "grad_norm": 0.3167150318622589, "learning_rate": 1.6300878435817115e-05, "loss": 2.845, "step": 30 }, { "epoch": 0.6542056074766355, "grad_norm": 0.29322898387908936, "learning_rate": 1.5085311186492206e-05, "loss": 2.923, "step": 35 }, { "epoch": 0.7476635514018691, "grad_norm": 0.3546823263168335, "learning_rate": 1.3758275821142382e-05, "loss": 2.9725, "step": 40 }, { "epoch": 0.8411214953271028, "grad_norm": 0.35268881916999817, "learning_rate": 1.234886045780984e-05, "loss": 2.9861, "step": 45 }, { "epoch": 0.9345794392523364, "grad_norm": 0.3079037666320801, "learning_rate": 1.0887958953229349e-05, "loss": 2.9897, "step": 50 }, { "epoch": 1.02803738317757, "grad_norm": 0.33415308594703674, "learning_rate": 9.407593721062858e-06, "loss": 2.9677, "step": 55 }, { "epoch": 1.1214953271028036, "grad_norm": 0.3114282190799713, "learning_rate": 7.940213812589018e-06, "loss": 2.962, "step": 60 }, { "epoch": 1.2149532710280373, "grad_norm": 0.3043849766254425, "learning_rate": 6.517983645656014e-06, "loss": 2.9335, "step": 65 }, { "epoch": 1.308411214953271, "grad_norm": 0.35792258381843567, "learning_rate": 5.172077972692553e-06, "loss": 2.9682, "step": 70 }, { "epoch": 1.4018691588785046, "grad_norm": 0.30202534794807434, "learning_rate": 3.931998541814069e-06, "loss": 2.9475, "step": 75 }, { "epoch": 1.4953271028037383, "grad_norm": 0.40600359439849854, "learning_rate": 2.8249274295566863e-06, "loss": 2.9373, "step": 80 }, { "epoch": 1.588785046728972, "grad_norm": 0.31094491481781006, "learning_rate": 1.875131219943187e-06, "loss": 2.9115, "step": 85 }, { "epoch": 1.6822429906542056, "grad_norm": 0.3808990716934204, "learning_rate": 1.1034290900525279e-06, "loss": 2.9237, "step": 90 }, { "epoch": 1.7757009345794392, "grad_norm": 0.33799558877944946, "learning_rate": 5.267364614580861e-07, "loss": 2.8901, "step": 95 }, { "epoch": 1.8691588785046729, "grad_norm": 0.3236595690250397, "learning_rate": 1.5769422052403172e-07, "loss": 2.8905, "step": 100 }, { "epoch": 1.9626168224299065, "grad_norm": 0.3427414000034332, "learning_rate": 4.39163491205652e-09, "loss": 2.9548, "step": 105 }, { "epoch": 1.9813084112149533, "step": 106, "total_flos": 1.078522514767872e+16, "train_loss": 2.9435044122192093, "train_runtime": 1048.2079, "train_samples_per_second": 1.631, "train_steps_per_second": 0.101 } ], "logging_steps": 5, "max_steps": 106, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, "total_flos": 1.078522514767872e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }