{ "best_metric": null, "best_model_checkpoint": null, "epoch": 200.0, "eval_steps": 500, "global_step": 6800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 14.705882352941176, "grad_norm": 0.9709584712982178, "learning_rate": 0.0009264705882352942, "loss": 6.89, "step": 500 }, { "epoch": 29.41176470588235, "grad_norm": 0.5043423771858215, "learning_rate": 0.0008529411764705882, "loss": 1.6253, "step": 1000 }, { "epoch": 44.11764705882353, "grad_norm": 0.24679997563362122, "learning_rate": 0.0007794117647058824, "loss": 0.0784, "step": 1500 }, { "epoch": 58.8235294117647, "grad_norm": 0.13042904436588287, "learning_rate": 0.0007058823529411765, "loss": 0.011, "step": 2000 }, { "epoch": 73.52941176470588, "grad_norm": 0.1388075053691864, "learning_rate": 0.0006323529411764706, "loss": 0.0133, "step": 2500 }, { "epoch": 88.23529411764706, "grad_norm": 0.21229323744773865, "learning_rate": 0.0005588235294117647, "loss": 0.0072, "step": 3000 }, { "epoch": 102.94117647058823, "grad_norm": 0.055483490228652954, "learning_rate": 0.0004852941176470588, "loss": 0.0052, "step": 3500 }, { "epoch": 117.6470588235294, "grad_norm": 0.06371592730283737, "learning_rate": 0.0004117647058823529, "loss": 0.0032, "step": 4000 }, { "epoch": 132.35294117647058, "grad_norm": 0.019501326605677605, "learning_rate": 0.0003382352941176471, "loss": 0.0028, "step": 4500 }, { "epoch": 147.05882352941177, "grad_norm": 0.18367668986320496, "learning_rate": 0.0002647058823529412, "loss": 0.0034, "step": 5000 }, { "epoch": 161.76470588235293, "grad_norm": 0.026915088295936584, "learning_rate": 0.00019117647058823528, "loss": 0.0029, "step": 5500 }, { "epoch": 176.47058823529412, "grad_norm": 0.014995796605944633, "learning_rate": 0.00011764705882352942, "loss": 0.0018, "step": 6000 }, { "epoch": 191.1764705882353, "grad_norm": 0.04031387344002724, "learning_rate": 4.411764705882353e-05, "loss": 0.0015, "step": 6500 }, { "epoch": 200.0, "step": 6800, "total_flos": 1.1838393679872e+17, "train_loss": 0.6357850695752045, "train_runtime": 11166.7529, "train_samples_per_second": 2.436, "train_steps_per_second": 0.609 } ], "logging_steps": 500, "max_steps": 6800, "num_input_tokens_seen": 0, "num_train_epochs": 200, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1838393679872e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }