{ "best_metric": null, "best_model_checkpoint": null, "epoch": 18.0, "eval_steps": 500, "global_step": 18, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.5107996463775635, "learning_rate": 2.5e-05, "loss": 0.3646, "step": 1 }, { "epoch": 1.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.4555206298828125, "eval_runtime": 7.0509, "eval_samples_per_second": 0.425, "eval_steps_per_second": 0.142, "step": 1 }, { "epoch": 2.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.4605119228363037, "eval_runtime": 5.0672, "eval_samples_per_second": 0.592, "eval_steps_per_second": 0.197, "step": 2 }, { "epoch": 3.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.600870370864868, "eval_runtime": 5.1285, "eval_samples_per_second": 0.585, "eval_steps_per_second": 0.195, "step": 3 }, { "epoch": 4.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.7373969554901123, "eval_runtime": 5.1975, "eval_samples_per_second": 0.577, "eval_steps_per_second": 0.192, "step": 4 }, { "epoch": 5.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.764043092727661, "eval_runtime": 5.6381, "eval_samples_per_second": 0.532, "eval_steps_per_second": 0.177, "step": 5 }, { "epoch": 6.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.7440683841705322, "eval_runtime": 5.2121, "eval_samples_per_second": 0.576, "eval_steps_per_second": 0.192, "step": 6 }, { "epoch": 7.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.7819652557373047, "eval_runtime": 5.5977, "eval_samples_per_second": 0.536, "eval_steps_per_second": 0.179, "step": 7 }, { "epoch": 8.0, "grad_norm": 0.9609292149543762, "learning_rate": 3.125e-05, "loss": 0.3442, "step": 8 }, { "epoch": 8.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.8066508769989014, "eval_runtime": 5.2493, "eval_samples_per_second": 0.572, "eval_steps_per_second": 0.191, "step": 8 }, { "epoch": 9.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.8143389225006104, "eval_runtime": 5.448, "eval_samples_per_second": 0.551, "eval_steps_per_second": 0.184, "step": 9 }, { "epoch": 10.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.796605348587036, "eval_runtime": 5.2784, "eval_samples_per_second": 0.568, "eval_steps_per_second": 0.189, "step": 10 }, { "epoch": 11.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.783594846725464, "eval_runtime": 5.0956, "eval_samples_per_second": 0.589, "eval_steps_per_second": 0.196, "step": 11 }, { "epoch": 12.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.7537152767181396, "eval_runtime": 5.2933, "eval_samples_per_second": 0.567, "eval_steps_per_second": 0.189, "step": 12 }, { "epoch": 13.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.72334885597229, "eval_runtime": 5.2117, "eval_samples_per_second": 0.576, "eval_steps_per_second": 0.192, "step": 13 }, { "epoch": 14.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.6946322917938232, "eval_runtime": 5.2603, "eval_samples_per_second": 0.57, "eval_steps_per_second": 0.19, "step": 14 }, { "epoch": 15.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.6637771129608154, "eval_runtime": 5.1635, "eval_samples_per_second": 0.581, "eval_steps_per_second": 0.194, "step": 15 }, { "epoch": 16.0, "grad_norm": 1.235273838043213, "learning_rate": 6.25e-06, "loss": 0.3003, "step": 16 }, { "epoch": 16.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.6449084281921387, "eval_runtime": 5.3638, "eval_samples_per_second": 0.559, "eval_steps_per_second": 0.186, "step": 16 }, { "epoch": 17.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.631160020828247, "eval_runtime": 5.4042, "eval_samples_per_second": 0.555, "eval_steps_per_second": 0.185, "step": 17 }, { "epoch": 18.0, "eval_Macro F1": 0.0, "eval_Macro Precision": 0.0, "eval_Macro Recall": 0.0, "eval_Micro F1": 0.0, "eval_Micro Precision": 0.0, "eval_Micro Recall": 0.0, "eval_Weighted F1": 0.0, "eval_Weighted Precision": 0.0, "eval_Weighted Recall": 0.0, "eval_accuracy": 0.0, "eval_loss": 2.6238577365875244, "eval_runtime": 5.1807, "eval_samples_per_second": 0.579, "eval_steps_per_second": 0.193, "step": 18 }, { "epoch": 18.0, "step": 18, "total_flos": 1.6740517517918208e+16, "train_loss": 0.32068005369769204, "train_runtime": 242.8786, "train_samples_per_second": 0.889, "train_steps_per_second": 0.074 } ], "logging_steps": 8, "max_steps": 18, "num_input_tokens_seen": 0, "num_train_epochs": 18, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6740517517918208e+16, "train_batch_size": 32, "trial_name": null, "trial_params": null }