{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.503370786516854, "eval_steps": 3, "global_step": 56, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.008988764044943821, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.3561, "step": 1 }, { "epoch": 0.02696629213483146, "grad_norm": 0.0, "learning_rate": 0.0, "loss": 2.3742, "step": 3 }, { "epoch": 0.05393258426966292, "grad_norm": 1.6691291411784266, "learning_rate": 1.2000000000000002e-06, "loss": 2.378, "step": 6 }, { "epoch": 0.08089887640449438, "grad_norm": 1.3100423824193625, "learning_rate": 2.4000000000000003e-06, "loss": 2.3941, "step": 9 }, { "epoch": 0.10786516853932585, "grad_norm": 1.5134581208383007, "learning_rate": 2.942156862745098e-06, "loss": 2.3612, "step": 12 }, { "epoch": 0.1348314606741573, "grad_norm": 1.0141381931565556, "learning_rate": 2.7686274509803922e-06, "loss": 2.357, "step": 15 }, { "epoch": 0.16179775280898875, "grad_norm": 0.8466314668182882, "learning_rate": 2.652941176470588e-06, "loss": 2.3297, "step": 18 }, { "epoch": 0.18876404494382024, "grad_norm": 1.0826426333604904, "learning_rate": 2.4794117647058824e-06, "loss": 2.3321, "step": 21 }, { "epoch": 0.2157303370786517, "grad_norm": 1.6280064420318552, "learning_rate": 2.305882352941176e-06, "loss": 2.307, "step": 24 }, { "epoch": 0.24269662921348314, "grad_norm": 0.6741353244640793, "learning_rate": 2.1323529411764704e-06, "loss": 2.3129, "step": 27 }, { "epoch": 0.2696629213483146, "grad_norm": 0.6817491063656745, "learning_rate": 1.9588235294117646e-06, "loss": 2.3311, "step": 30 }, { "epoch": 0.2966292134831461, "grad_norm": 2.1277430036014007, "learning_rate": 1.7852941176470589e-06, "loss": 2.3254, "step": 33 }, { "epoch": 0.3235955056179775, "grad_norm": 0.638893880509862, "learning_rate": 1.6117647058823529e-06, "loss": 2.2827, "step": 36 }, { "epoch": 0.350561797752809, "grad_norm": 0.7151303076075821, "learning_rate": 1.4382352941176471e-06, "loss": 2.3282, "step": 39 }, { "epoch": 0.3775280898876405, "grad_norm": 0.7055970837174532, "learning_rate": 1.3225490196078432e-06, "loss": 2.2962, "step": 42 }, { "epoch": 0.4044943820224719, "grad_norm": 0.7744109790448125, "learning_rate": 1.1490196078431372e-06, "loss": 2.3371, "step": 45 }, { "epoch": 0.4314606741573034, "grad_norm": 0.733295577409483, "learning_rate": 9.754901960784315e-07, "loss": 2.2953, "step": 48 }, { "epoch": 0.4584269662921348, "grad_norm": 1.4483568999325545, "learning_rate": 8.019607843137255e-07, "loss": 2.2863, "step": 51 }, { "epoch": 0.4853932584269663, "grad_norm": 1.0119535871876866, "learning_rate": 6.284313725490195e-07, "loss": 2.2993, "step": 54 }, { "epoch": 0.503370786516854, "step": 56, "total_flos": 223005439426560.0, "train_loss": 2.3332885844366893, "train_runtime": 17499.5122, "train_samples_per_second": 0.407, "train_steps_per_second": 0.003 } ], "logging_steps": 3, "max_steps": 56, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 12, "total_flos": 223005439426560.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }