{ "best_metric": 0.55, "best_model_checkpoint": "test\\checkpoint-1000", "epoch": 500.0, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 50.0, "eval_accuracy": 0.6, "eval_f1": 0.55, "eval_loss": 1.2207863330841064, "eval_precision": 0.55, "eval_recall": 0.55, "eval_runtime": 4.8902, "eval_samples_per_second": 0.409, "eval_steps_per_second": 0.204, "step": 100 }, { "epoch": 100.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.5365853658536585, "eval_loss": 0.9717425107955933, "eval_precision": 0.5238095238095238, "eval_recall": 0.55, "eval_runtime": 4.525, "eval_samples_per_second": 0.442, "eval_steps_per_second": 0.221, "step": 200 }, { "epoch": 150.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.5365853658536585, "eval_loss": 1.0064021348953247, "eval_precision": 0.5238095238095238, "eval_recall": 0.55, "eval_runtime": 4.277, "eval_samples_per_second": 0.468, "eval_steps_per_second": 0.234, "step": 300 }, { "epoch": 200.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.55, "eval_loss": 1.0413910150527954, "eval_precision": 0.55, "eval_recall": 0.55, "eval_runtime": 4.4579, "eval_samples_per_second": 0.449, "eval_steps_per_second": 0.224, "step": 400 }, { "epoch": 250.0, "grad_norm": 0.17069780826568604, "learning_rate": 5e-06, "loss": 0.326, "step": 500 }, { "epoch": 250.0, "eval_accuracy": 0.6, "eval_f1": 0.4878048780487805, "eval_loss": 1.0729484558105469, "eval_precision": 0.47619047619047616, "eval_recall": 0.5, "eval_runtime": 4.4009, "eval_samples_per_second": 0.454, "eval_steps_per_second": 0.227, "step": 500 }, { "epoch": 300.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.55, "eval_loss": 1.066214919090271, "eval_precision": 0.55, "eval_recall": 0.55, "eval_runtime": 4.6788, "eval_samples_per_second": 0.427, "eval_steps_per_second": 0.214, "step": 600 }, { "epoch": 350.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.55, "eval_loss": 1.075073480606079, "eval_precision": 0.55, "eval_recall": 0.55, "eval_runtime": 4.6425, "eval_samples_per_second": 0.431, "eval_steps_per_second": 0.215, "step": 700 }, { "epoch": 400.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.55, "eval_loss": 1.0892070531845093, "eval_precision": 0.55, "eval_recall": 0.55, "eval_runtime": 4.3673, "eval_samples_per_second": 0.458, "eval_steps_per_second": 0.229, "step": 800 }, { "epoch": 450.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.55, "eval_loss": 1.0904639959335327, "eval_precision": 0.55, "eval_recall": 0.55, "eval_runtime": 4.3499, "eval_samples_per_second": 0.46, "eval_steps_per_second": 0.23, "step": 900 }, { "epoch": 500.0, "grad_norm": 0.07940377295017242, "learning_rate": 0.0, "loss": 0.0113, "step": 1000 }, { "epoch": 500.0, "eval_accuracy": 0.6333333333333333, "eval_f1": 0.55, "eval_loss": 1.093883991241455, "eval_precision": 0.55, "eval_recall": 0.55, "eval_runtime": 4.4538, "eval_samples_per_second": 0.449, "eval_steps_per_second": 0.225, "step": 1000 } ], "logging_steps": 500, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 500, "total_flos": 398157811200000.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }