{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 325, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15384615384615385, "grad_norm": 1.6880494356155396, "learning_rate": 1.9384615384615386e-05, "loss": 1.0254, "step": 10 }, { "epoch": 0.3076923076923077, "grad_norm": 1.6863218545913696, "learning_rate": 1.876923076923077e-05, "loss": 0.875, "step": 20 }, { "epoch": 0.46153846153846156, "grad_norm": 1.5608391761779785, "learning_rate": 1.8153846153846155e-05, "loss": 0.7825, "step": 30 }, { "epoch": 0.6153846153846154, "grad_norm": 1.4615881443023682, "learning_rate": 1.753846153846154e-05, "loss": 0.6326, "step": 40 }, { "epoch": 0.7692307692307693, "grad_norm": 2.1352627277374268, "learning_rate": 1.6923076923076924e-05, "loss": 0.5693, "step": 50 }, { "epoch": 0.9230769230769231, "grad_norm": 1.560471773147583, "learning_rate": 1.630769230769231e-05, "loss": 0.4709, "step": 60 }, { "epoch": 1.0769230769230769, "grad_norm": 1.3950369358062744, "learning_rate": 1.5692307692307693e-05, "loss": 0.4044, "step": 70 }, { "epoch": 1.2307692307692308, "grad_norm": 1.1524418592453003, "learning_rate": 1.5076923076923078e-05, "loss": 0.3491, "step": 80 }, { "epoch": 1.3846153846153846, "grad_norm": 1.3851335048675537, "learning_rate": 1.4461538461538462e-05, "loss": 0.3191, "step": 90 }, { "epoch": 1.5384615384615383, "grad_norm": 0.7817617654800415, "learning_rate": 1.3846153846153847e-05, "loss": 0.2975, "step": 100 }, { "epoch": 1.6923076923076923, "grad_norm": 2.4296486377716064, "learning_rate": 1.3230769230769231e-05, "loss": 0.2719, "step": 110 }, { "epoch": 1.8461538461538463, "grad_norm": 3.495382308959961, "learning_rate": 1.2615384615384616e-05, "loss": 0.2863, "step": 120 }, { "epoch": 2.0, "grad_norm": 0.9178031086921692, "learning_rate": 1.2e-05, "loss": 0.2066, "step": 130 }, { "epoch": 2.1538461538461537, "grad_norm": 1.042531967163086, "learning_rate": 1.1384615384615385e-05, "loss": 0.1934, "step": 140 }, { "epoch": 2.3076923076923075, "grad_norm": 2.2865443229675293, "learning_rate": 1.076923076923077e-05, "loss": 0.1905, "step": 150 }, { "epoch": 2.4615384615384617, "grad_norm": 2.3291144371032715, "learning_rate": 1.0153846153846154e-05, "loss": 0.2134, "step": 160 }, { "epoch": 2.6153846153846154, "grad_norm": 1.420284628868103, "learning_rate": 9.53846153846154e-06, "loss": 0.1895, "step": 170 }, { "epoch": 2.769230769230769, "grad_norm": 2.7554662227630615, "learning_rate": 8.923076923076925e-06, "loss": 0.1897, "step": 180 }, { "epoch": 2.9230769230769234, "grad_norm": 1.4687916040420532, "learning_rate": 8.307692307692309e-06, "loss": 0.1761, "step": 190 }, { "epoch": 3.076923076923077, "grad_norm": 3.535647392272949, "learning_rate": 7.692307692307694e-06, "loss": 0.1862, "step": 200 }, { "epoch": 3.230769230769231, "grad_norm": 3.035341739654541, "learning_rate": 7.076923076923078e-06, "loss": 0.1937, "step": 210 }, { "epoch": 3.3846153846153846, "grad_norm": 2.828181028366089, "learning_rate": 6.461538461538463e-06, "loss": 0.1407, "step": 220 }, { "epoch": 3.5384615384615383, "grad_norm": 2.125542163848877, "learning_rate": 5.846153846153847e-06, "loss": 0.1338, "step": 230 }, { "epoch": 3.6923076923076925, "grad_norm": 1.5795857906341553, "learning_rate": 5.230769230769232e-06, "loss": 0.131, "step": 240 }, { "epoch": 3.8461538461538463, "grad_norm": 0.5753270387649536, "learning_rate": 4.615384615384616e-06, "loss": 0.1493, "step": 250 }, { "epoch": 4.0, "grad_norm": 0.9189938306808472, "learning_rate": 4.000000000000001e-06, "loss": 0.1489, "step": 260 }, { "epoch": 4.153846153846154, "grad_norm": 0.939368724822998, "learning_rate": 3.384615384615385e-06, "loss": 0.1615, "step": 270 }, { "epoch": 4.3076923076923075, "grad_norm": 1.0938255786895752, "learning_rate": 2.7692307692307697e-06, "loss": 0.1433, "step": 280 }, { "epoch": 4.461538461538462, "grad_norm": 0.398496150970459, "learning_rate": 2.153846153846154e-06, "loss": 0.1485, "step": 290 }, { "epoch": 4.615384615384615, "grad_norm": 1.6919806003570557, "learning_rate": 1.5384615384615387e-06, "loss": 0.131, "step": 300 }, { "epoch": 4.769230769230769, "grad_norm": 0.9723582863807678, "learning_rate": 9.230769230769232e-07, "loss": 0.1288, "step": 310 }, { "epoch": 4.923076923076923, "grad_norm": 0.5153496265411377, "learning_rate": 3.0769230769230774e-07, "loss": 0.109, "step": 320 }, { "epoch": 5.0, "step": 325, "total_flos": 4.006371770595533e+17, "train_loss": 0.29627929100623496, "train_runtime": 105.6493, "train_samples_per_second": 48.935, "train_steps_per_second": 3.076 } ], "logging_steps": 10, "max_steps": 325, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.006371770595533e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }