{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 2801, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03570153516601214, "grad_norm": 0.2294982522726059, "learning_rate": 2.0000000000000003e-06, "loss": 2.4571, "step": 100 }, { "epoch": 0.07140307033202428, "grad_norm": 0.17154090106487274, "learning_rate": 4.000000000000001e-06, "loss": 2.4541, "step": 200 }, { "epoch": 0.10710460549803641, "grad_norm": 0.19298459589481354, "learning_rate": 6e-06, "loss": 2.4134, "step": 300 }, { "epoch": 0.14280614066404856, "grad_norm": 0.32980746030807495, "learning_rate": 8.000000000000001e-06, "loss": 2.3925, "step": 400 }, { "epoch": 0.1785076758300607, "grad_norm": 0.4573824107646942, "learning_rate": 1e-05, "loss": 2.309, "step": 500 }, { "epoch": 0.21420921099607282, "grad_norm": 0.438753604888916, "learning_rate": 1.2e-05, "loss": 2.3336, "step": 600 }, { "epoch": 0.24991074616208497, "grad_norm": 0.5054428577423096, "learning_rate": 1.4e-05, "loss": 2.2911, "step": 700 }, { "epoch": 0.28561228132809713, "grad_norm": 0.5664726495742798, "learning_rate": 1.6000000000000003e-05, "loss": 2.234, "step": 800 }, { "epoch": 0.32131381649410923, "grad_norm": 0.6149884462356567, "learning_rate": 1.8e-05, "loss": 2.2339, "step": 900 }, { "epoch": 0.3570153516601214, "grad_norm": 0.7202581763267517, "learning_rate": 2e-05, "loss": 2.198, "step": 1000 }, { "epoch": 0.39271688682613354, "grad_norm": 0.6320284008979797, "learning_rate": 1.98482457644138e-05, "loss": 2.1665, "step": 1100 }, { "epoch": 0.42841842199214564, "grad_norm": 0.6476149559020996, "learning_rate": 1.9397588927258876e-05, "loss": 2.2035, "step": 1200 }, { "epoch": 0.4641199571581578, "grad_norm": 0.6560292840003967, "learning_rate": 1.8661707305302052e-05, "loss": 2.1686, "step": 1300 }, { "epoch": 0.49982149232416995, "grad_norm": 0.6389040350914001, "learning_rate": 1.7662935529147725e-05, "loss": 2.1021, "step": 1400 }, { "epoch": 0.5355230274901821, "grad_norm": 0.8602333068847656, "learning_rate": 1.643158716827897e-05, "loss": 2.1302, "step": 1500 }, { "epoch": 0.5712245626561943, "grad_norm": 0.764371395111084, "learning_rate": 1.500503468854458e-05, "loss": 2.147, "step": 1600 }, { "epoch": 0.6069260978222063, "grad_norm": 0.809414267539978, "learning_rate": 1.342657516616169e-05, "loss": 2.1208, "step": 1700 }, { "epoch": 0.6426276329882185, "grad_norm": 0.8461398482322693, "learning_rate": 1.1744116184774898e-05, "loss": 2.067, "step": 1800 }, { "epoch": 0.6783291681542306, "grad_norm": 0.7336994409561157, "learning_rate": 1.00087217997093e-05, "loss": 2.1056, "step": 1900 }, { "epoch": 0.7140307033202428, "grad_norm": 0.8887193202972412, "learning_rate": 8.273062700634137e-06, "loss": 2.0957, "step": 2000 }, { "epoch": 0.7497322384862549, "grad_norm": 0.7335702776908875, "learning_rate": 6.589817611513086e-06, "loss": 2.0847, "step": 2100 }, { "epoch": 0.7854337736522671, "grad_norm": 0.7930261492729187, "learning_rate": 5.010074446706905e-06, "loss": 2.1111, "step": 2200 }, { "epoch": 0.8211353088182792, "grad_norm": 0.754761278629303, "learning_rate": 3.5817797494951313e-06, "loss": 2.1029, "step": 2300 }, { "epoch": 0.8568368439842913, "grad_norm": 0.7991265058517456, "learning_rate": 2.3482834738712023e-06, "loss": 2.0618, "step": 2400 }, { "epoch": 0.8925383791503034, "grad_norm": 1.1803473234176636, "learning_rate": 1.347023276716265e-06, "loss": 2.0831, "step": 2500 }, { "epoch": 0.9282399143163156, "grad_norm": 0.8793602585792542, "learning_rate": 6.083882531987495e-07, "loss": 2.1029, "step": 2600 }, { "epoch": 0.9639414494823277, "grad_norm": 1.1215691566467285, "learning_rate": 1.5479660199286927e-07, "loss": 2.0779, "step": 2700 }, { "epoch": 0.9996429846483399, "grad_norm": 0.8478527665138245, "learning_rate": 1.5213958033388766e-11, "loss": 2.0917, "step": 2800 }, { "epoch": 1.0, "step": 2801, "total_flos": 5.10776634114048e+16, "train_loss": 2.1906768205718286, "train_runtime": 755.3629, "train_samples_per_second": 7.416, "train_steps_per_second": 3.708 } ], "logging_steps": 100, "max_steps": 2801, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.10776634114048e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }