{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9453781512605042, "eval_steps": 500, "global_step": 1800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.026260504201680673, "grad_norm": 2.606417417526245, "learning_rate": 0.0038949579831932773, "loss": 1.2269, "step": 50 }, { "epoch": 0.052521008403361345, "grad_norm": 2.463599920272827, "learning_rate": 0.0037899159663865546, "loss": 1.1055, "step": 100 }, { "epoch": 0.07878151260504201, "grad_norm": 3.1962060928344727, "learning_rate": 0.003684873949579832, "loss": 1.0706, "step": 150 }, { "epoch": 0.10504201680672269, "grad_norm": 3.148374557495117, "learning_rate": 0.0035798319327731095, "loss": 1.1056, "step": 200 }, { "epoch": 0.13130252100840337, "grad_norm": 2.5041284561157227, "learning_rate": 0.0034747899159663868, "loss": 1.0834, "step": 250 }, { "epoch": 0.15756302521008403, "grad_norm": 2.5208585262298584, "learning_rate": 0.003369747899159664, "loss": 1.1076, "step": 300 }, { "epoch": 0.18382352941176472, "grad_norm": 2.758544445037842, "learning_rate": 0.0032647058823529413, "loss": 1.0285, "step": 350 }, { "epoch": 0.21008403361344538, "grad_norm": 3.854949951171875, "learning_rate": 0.0031596638655462185, "loss": 1.0204, "step": 400 }, { "epoch": 0.23634453781512604, "grad_norm": 3.600606918334961, "learning_rate": 0.0030546218487394958, "loss": 1.0311, "step": 450 }, { "epoch": 0.26260504201680673, "grad_norm": 3.2583634853363037, "learning_rate": 0.0029495798319327735, "loss": 0.9868, "step": 500 }, { "epoch": 0.28886554621848737, "grad_norm": 3.2210769653320312, "learning_rate": 0.0028445378151260507, "loss": 1.0129, "step": 550 }, { "epoch": 0.31512605042016806, "grad_norm": 3.139937162399292, "learning_rate": 0.002739495798319328, "loss": 1.0096, "step": 600 }, { "epoch": 0.34138655462184875, "grad_norm": 2.95237398147583, "learning_rate": 0.002634453781512605, "loss": 0.9322, "step": 650 }, { "epoch": 0.36764705882352944, "grad_norm": 3.39207124710083, "learning_rate": 0.0025294117647058825, "loss": 0.939, "step": 700 }, { "epoch": 0.3939075630252101, "grad_norm": 3.5582635402679443, "learning_rate": 0.0024243697478991597, "loss": 1.0108, "step": 750 }, { "epoch": 0.42016806722689076, "grad_norm": 3.2852275371551514, "learning_rate": 0.0023193277310924374, "loss": 0.884, "step": 800 }, { "epoch": 0.44642857142857145, "grad_norm": 3.8662281036376953, "learning_rate": 0.0022142857142857146, "loss": 0.8889, "step": 850 }, { "epoch": 0.4726890756302521, "grad_norm": 3.1788485050201416, "learning_rate": 0.002109243697478992, "loss": 0.8649, "step": 900 }, { "epoch": 0.4989495798319328, "grad_norm": 3.658193588256836, "learning_rate": 0.002004201680672269, "loss": 0.821, "step": 950 }, { "epoch": 0.5252100840336135, "grad_norm": 3.557441234588623, "learning_rate": 0.0018991596638655462, "loss": 0.8908, "step": 1000 }, { "epoch": 0.5514705882352942, "grad_norm": 3.6631717681884766, "learning_rate": 0.0017941176470588236, "loss": 0.8805, "step": 1050 }, { "epoch": 0.5777310924369747, "grad_norm": 3.9701390266418457, "learning_rate": 0.001689075630252101, "loss": 0.8352, "step": 1100 }, { "epoch": 0.6039915966386554, "grad_norm": 3.4215235710144043, "learning_rate": 0.0015840336134453781, "loss": 0.8129, "step": 1150 }, { "epoch": 0.6302521008403361, "grad_norm": 3.4287657737731934, "learning_rate": 0.0014789915966386556, "loss": 0.8299, "step": 1200 }, { "epoch": 0.6565126050420168, "grad_norm": 3.0316977500915527, "learning_rate": 0.0013739495798319329, "loss": 0.8371, "step": 1250 }, { "epoch": 0.6827731092436975, "grad_norm": 3.983886957168579, "learning_rate": 0.0012689075630252101, "loss": 0.816, "step": 1300 }, { "epoch": 0.7090336134453782, "grad_norm": 4.039212226867676, "learning_rate": 0.0011638655462184876, "loss": 0.791, "step": 1350 }, { "epoch": 0.7352941176470589, "grad_norm": 3.5772385597229004, "learning_rate": 0.0010588235294117648, "loss": 0.792, "step": 1400 }, { "epoch": 0.7615546218487395, "grad_norm": 3.9995005130767822, "learning_rate": 0.0009537815126050421, "loss": 0.8089, "step": 1450 }, { "epoch": 0.7878151260504201, "grad_norm": 3.690575122833252, "learning_rate": 0.0008487394957983193, "loss": 0.733, "step": 1500 }, { "epoch": 0.8140756302521008, "grad_norm": 3.8193883895874023, "learning_rate": 0.0007436974789915967, "loss": 0.7532, "step": 1550 }, { "epoch": 0.8403361344537815, "grad_norm": 3.2196240425109863, "learning_rate": 0.0006386554621848739, "loss": 0.7209, "step": 1600 }, { "epoch": 0.8665966386554622, "grad_norm": 3.2536444664001465, "learning_rate": 0.0005336134453781513, "loss": 0.7259, "step": 1650 }, { "epoch": 0.8928571428571429, "grad_norm": 4.125887870788574, "learning_rate": 0.00042857142857142855, "loss": 0.7055, "step": 1700 }, { "epoch": 0.9191176470588235, "grad_norm": 3.512568950653076, "learning_rate": 0.0003235294117647059, "loss": 0.7074, "step": 1750 }, { "epoch": 0.9453781512605042, "grad_norm": 4.172778129577637, "learning_rate": 0.00021848739495798319, "loss": 0.7267, "step": 1800 } ], "logging_steps": 50, "max_steps": 1904, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }