{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5, "eval_steps": 3, "global_step": 60, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008333333333333333, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.1097, "step": 1 }, { "epoch": 0.025, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 1.1424, "step": 3 }, { "epoch": 0.05, "grad_norm": 1.4957225914582386, "learning_rate": 6.000000000000001e-07, "loss": 1.1714, "step": 6 }, { "epoch": 0.075, "grad_norm": 1.0203233450225075, "learning_rate": 2.4000000000000003e-06, "loss": 1.1459, "step": 9 }, { "epoch": 0.1, "grad_norm": 1.1952212091375145, "learning_rate": 3e-06, "loss": 1.1465, "step": 12 }, { "epoch": 0.125, "grad_norm": 1.832578954318156, "learning_rate": 2.839090909090909e-06, "loss": 1.1517, "step": 15 }, { "epoch": 0.15, "grad_norm": 1.0877339332216878, "learning_rate": 2.678181818181818e-06, "loss": 1.0991, "step": 18 }, { "epoch": 0.175, "grad_norm": 1.4203314600042332, "learning_rate": 2.5172727272727275e-06, "loss": 1.1481, "step": 21 }, { "epoch": 0.2, "grad_norm": 2.543863444651869, "learning_rate": 2.3563636363636366e-06, "loss": 1.151, "step": 24 }, { "epoch": 0.225, "grad_norm": 2.2672694357791805, "learning_rate": 2.1954545454545456e-06, "loss": 1.1132, "step": 27 }, { "epoch": 0.25, "grad_norm": 0.7570282713584158, "learning_rate": 2.0345454545454546e-06, "loss": 1.1043, "step": 30 }, { "epoch": 0.275, "grad_norm": 0.7556695763204282, "learning_rate": 1.9272727272727273e-06, "loss": 1.0492, "step": 33 }, { "epoch": 0.3, "grad_norm": 2.5250919282965105, "learning_rate": 1.8199999999999997e-06, "loss": 1.1206, "step": 36 }, { "epoch": 0.325, "grad_norm": 2.402860972914852, "learning_rate": 1.659090909090909e-06, "loss": 1.0908, "step": 39 }, { "epoch": 0.35, "grad_norm": 0.9250227812759156, "learning_rate": 1.4981818181818184e-06, "loss": 1.0982, "step": 42 }, { "epoch": 0.375, "grad_norm": 1.0119045287556745, "learning_rate": 1.3372727272727274e-06, "loss": 1.1147, "step": 45 }, { "epoch": 0.4, "grad_norm": 2.7972164809866773, "learning_rate": 1.1763636363636364e-06, "loss": 1.1004, "step": 48 }, { "epoch": 0.425, "grad_norm": 1.4489209555210352, "learning_rate": 1.0154545454545454e-06, "loss": 1.0982, "step": 51 }, { "epoch": 0.45, "grad_norm": 0.7531358827177462, "learning_rate": 8.545454545454544e-07, "loss": 1.0794, "step": 54 }, { "epoch": 0.475, "grad_norm": 0.7368058065522421, "learning_rate": 6.936363636363635e-07, "loss": 1.0948, "step": 57 }, { "epoch": 0.5, "grad_norm": 1.2350079863263905, "learning_rate": 5.327272727272729e-07, "loss": 1.1091, "step": 60 }, { "epoch": 0.5, "step": 60, "total_flos": 238563958456320.0, "train_loss": 1.1158998648325602, "train_runtime": 19061.2793, "train_samples_per_second": 0.402, "train_steps_per_second": 0.003 } ], "logging_steps": 3, "max_steps": 60, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 12, "total_flos": 238563958456320.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }