{ "best_metric": 0.9361702127659575, "best_model_checkpoint": "portrait_cosu_exp3/checkpoint-13", "epoch": 3.6923076923076925, "eval_steps": 500, "global_step": 24, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.9230769230769231, "eval_accuracy": 0.851063829787234, "eval_confusion_matrix": [ [ 17, 3 ], [ 4, 23 ] ], "eval_f1": 0.8514750663363719, "eval_loss": 0.2920527458190918, "eval_precision": 0.8526615228742889, "eval_recall": 0.851063829787234, "eval_runtime": 3.0689, "eval_samples_per_second": 15.315, "eval_steps_per_second": 0.978, "step": 6 }, { "epoch": 1.5384615384615383, "grad_norm": 7.153378486633301, "learning_rate": 3.3333333333333335e-05, "loss": 0.5415, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.9361702127659575, "eval_confusion_matrix": [ [ 17, 3 ], [ 0, 27 ] ], "eval_f1": 0.9352622499319029, "eval_loss": 0.25635045766830444, "eval_precision": 0.9425531914893617, "eval_recall": 0.9361702127659575, "eval_runtime": 3.2937, "eval_samples_per_second": 14.27, "eval_steps_per_second": 0.911, "step": 13 }, { "epoch": 2.9230769230769234, "eval_accuracy": 0.8723404255319149, "eval_confusion_matrix": [ [ 19, 1 ], [ 5, 22 ] ], "eval_f1": 0.873036750483559, "eval_loss": 0.3604692220687866, "eval_precision": 0.8863706444650015, "eval_recall": 0.8723404255319149, "eval_runtime": 3.1924, "eval_samples_per_second": 14.722, "eval_steps_per_second": 0.94, "step": 19 }, { "epoch": 3.076923076923077, "grad_norm": 9.893902778625488, "learning_rate": 9.523809523809523e-06, "loss": 0.378, "step": 20 }, { "epoch": 3.6923076923076925, "eval_accuracy": 0.9148936170212766, "eval_confusion_matrix": [ [ 19, 1 ], [ 3, 24 ] ], "eval_f1": 0.9152832982620216, "eval_loss": 0.25239235162734985, "eval_precision": 0.9189941972920695, "eval_recall": 0.9148936170212766, "eval_runtime": 2.9295, "eval_samples_per_second": 16.044, "eval_steps_per_second": 1.024, "step": 24 }, { "epoch": 3.6923076923076925, "step": 24, "total_flos": 3.505150325906473e+17, "train_loss": 0.428266316652298, "train_runtime": 301.1045, "train_samples_per_second": 5.526, "train_steps_per_second": 0.08 } ], "logging_steps": 10, "max_steps": 24, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 3.505150325906473e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }