{ "best_metric": 0.747850775718689, "best_model_checkpoint": "Action_model/checkpoint-300", "epoch": 1.0, "eval_steps": 100, "global_step": 314, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06, "grad_norm": 1.3472708463668823, "learning_rate": 9.363057324840766e-05, "loss": 2.1856, "step": 20 }, { "epoch": 0.13, "grad_norm": 1.6540354490280151, "learning_rate": 8.726114649681529e-05, "loss": 1.9121, "step": 40 }, { "epoch": 0.19, "grad_norm": 1.9076273441314697, "learning_rate": 8.089171974522294e-05, "loss": 1.5695, "step": 60 }, { "epoch": 0.25, "grad_norm": 2.171828508377075, "learning_rate": 7.452229299363057e-05, "loss": 1.3273, "step": 80 }, { "epoch": 0.32, "grad_norm": 3.170124053955078, "learning_rate": 6.815286624203822e-05, "loss": 1.1778, "step": 100 }, { "epoch": 0.32, "eval_accuracy": 0.78, "eval_loss": 1.0651038885116577, "eval_runtime": 20.8448, "eval_samples_per_second": 50.372, "eval_steps_per_second": 6.333, "step": 100 }, { "epoch": 0.38, "grad_norm": 2.705270767211914, "learning_rate": 6.178343949044585e-05, "loss": 1.0757, "step": 120 }, { "epoch": 0.45, "grad_norm": 2.980999231338501, "learning_rate": 5.5414012738853505e-05, "loss": 1.0115, "step": 140 }, { "epoch": 0.51, "grad_norm": 4.483489990234375, "learning_rate": 4.904458598726115e-05, "loss": 0.9806, "step": 160 }, { "epoch": 0.57, "grad_norm": 3.3504586219787598, "learning_rate": 4.267515923566879e-05, "loss": 0.8805, "step": 180 }, { "epoch": 0.64, "grad_norm": 3.248554229736328, "learning_rate": 3.630573248407643e-05, "loss": 0.8527, "step": 200 }, { "epoch": 0.64, "eval_accuracy": 0.8133333333333334, "eval_loss": 0.7873561978340149, "eval_runtime": 14.6757, "eval_samples_per_second": 71.547, "eval_steps_per_second": 8.994, "step": 200 }, { "epoch": 0.7, "grad_norm": 2.6971187591552734, "learning_rate": 2.9936305732484078e-05, "loss": 0.7773, "step": 220 }, { "epoch": 0.76, "grad_norm": 2.3084142208099365, "learning_rate": 2.356687898089172e-05, "loss": 0.6965, "step": 240 }, { "epoch": 0.83, "grad_norm": 3.3731772899627686, "learning_rate": 1.7197452229299362e-05, "loss": 0.77, "step": 260 }, { "epoch": 0.89, "grad_norm": 1.8178930282592773, "learning_rate": 1.0828025477707008e-05, "loss": 0.7106, "step": 280 }, { "epoch": 0.96, "grad_norm": 2.819934368133545, "learning_rate": 4.45859872611465e-06, "loss": 0.7114, "step": 300 }, { "epoch": 0.96, "eval_accuracy": 0.8095238095238095, "eval_loss": 0.747850775718689, "eval_runtime": 14.738, "eval_samples_per_second": 71.244, "eval_steps_per_second": 8.956, "step": 300 }, { "epoch": 1.0, "step": 314, "total_flos": 7.776878731479245e+17, "train_loss": 1.0910153146002703, "train_runtime": 357.1216, "train_samples_per_second": 28.1, "train_steps_per_second": 0.879 } ], "logging_steps": 20, "max_steps": 314, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 7.776878731479245e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }