{ "best_metric": 0.6791029572486877, "best_model_checkpoint": "./results/checkpoint-342", "epoch": 4.0, "eval_steps": 500, "global_step": 684, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.7781569965870307, "eval_confusion_matrix": [ [ 58, 0, 1, 1, 10, 9, 1 ], [ 2, 81, 0, 0, 4, 2, 0 ], [ 0, 0, 87, 0, 2, 6, 0 ], [ 1, 0, 0, 67, 5, 2, 0 ], [ 1, 0, 3, 4, 69, 6, 4 ], [ 1, 0, 6, 1, 12, 56, 0 ], [ 0, 0, 0, 4, 31, 11, 38 ] ], "eval_f1": 0.7777605036882297, "eval_loss": 0.6863528490066528, "eval_precision": 0.8141265117353299, "eval_recall": 0.7752230960697254, "eval_runtime": 414.2099, "eval_samples_per_second": 1.415, "eval_steps_per_second": 0.179, "step": 171 }, { "epoch": 2.0, "eval_accuracy": 0.8003412969283277, "eval_confusion_matrix": [ [ 72, 0, 0, 1, 4, 2, 1 ], [ 2, 84, 0, 0, 0, 0, 3 ], [ 2, 0, 86, 0, 3, 4, 0 ], [ 7, 0, 0, 65, 2, 1, 0 ], [ 10, 1, 2, 2, 53, 8, 11 ], [ 5, 0, 10, 1, 9, 51, 0 ], [ 1, 0, 0, 3, 17, 5, 58 ] ], "eval_f1": 0.7977110437689846, "eval_loss": 0.6791029572486877, "eval_precision": 0.8015628505641682, "eval_recall": 0.7980677533763562, "eval_runtime": 417.5215, "eval_samples_per_second": 1.404, "eval_steps_per_second": 0.177, "step": 342 }, { "epoch": 2.92, "grad_norm": 0.1340806484222412, "learning_rate": 5.3801169590643275e-05, "loss": 0.3724, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.8225255972696246, "eval_confusion_matrix": [ [ 72, 0, 1, 0, 3, 3, 1 ], [ 1, 81, 1, 0, 2, 0, 4 ], [ 1, 0, 86, 0, 0, 8, 0 ], [ 5, 0, 0, 67, 1, 2, 0 ], [ 10, 0, 3, 4, 47, 5, 18 ], [ 1, 0, 5, 1, 1, 67, 1 ], [ 2, 0, 1, 3, 11, 5, 62 ] ], "eval_f1": 0.819196982637564, "eval_loss": 0.805719256401062, "eval_precision": 0.821570223887049, "eval_recall": 0.8240875601856804, "eval_runtime": 415.3032, "eval_samples_per_second": 1.411, "eval_steps_per_second": 0.178, "step": 513 }, { "epoch": 4.0, "eval_accuracy": 0.8242320819112628, "eval_confusion_matrix": [ [ 71, 0, 0, 1, 3, 4, 1 ], [ 1, 82, 1, 0, 2, 1, 2 ], [ 1, 0, 85, 0, 1, 8, 0 ], [ 4, 0, 0, 67, 3, 0, 1 ], [ 5, 0, 2, 6, 50, 4, 20 ], [ 1, 0, 5, 1, 2, 66, 1 ], [ 1, 0, 0, 3, 15, 3, 62 ] ], "eval_f1": 0.8226354635422165, "eval_loss": 0.7115229368209839, "eval_precision": 0.8223101749006074, "eval_recall": 0.8254496320643309, "eval_runtime": 409.1206, "eval_samples_per_second": 1.432, "eval_steps_per_second": 0.181, "step": 684 } ], "logging_steps": 500, "max_steps": 684, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "total_flos": 4.234352059828961e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }