{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 10560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.2974101305007935, "learning_rate": 4.75e-05, "loss": 0.7682, "step": 528 }, { "epoch": 1.0, "eval_accuracy": 0.8662923581436948, "eval_f1": 0.18337078651685396, "eval_loss": 0.43944087624549866, "eval_precision": 0.40476190476190477, "eval_recall": 0.11853573503776874, "eval_runtime": 4.4831, "eval_samples_per_second": 208.562, "eval_steps_per_second": 3.346, "step": 528 }, { "epoch": 2.0, "grad_norm": 0.8939604759216309, "learning_rate": 4.5e-05, "loss": 0.3466, "step": 1056 }, { "epoch": 2.0, "eval_accuracy": 0.9326741528371261, "eval_f1": 0.6631853785900783, "eval_loss": 0.22170819342136383, "eval_precision": 0.602181128496918, "eval_recall": 0.7379430563625798, "eval_runtime": 4.5771, "eval_samples_per_second": 204.278, "eval_steps_per_second": 3.277, "step": 1056 }, { "epoch": 3.0, "grad_norm": 1.3257710933685303, "learning_rate": 4.25e-05, "loss": 0.2131, "step": 1584 }, { "epoch": 3.0, "eval_accuracy": 0.9428102951406141, "eval_f1": 0.7492870106300232, "eval_loss": 0.172767773270607, "eval_precision": 0.6764981273408239, "eval_recall": 0.8396281231841952, "eval_runtime": 4.55, "eval_samples_per_second": 205.495, "eval_steps_per_second": 3.297, "step": 1584 }, { "epoch": 4.0, "grad_norm": 1.8121825456619263, "learning_rate": 4e-05, "loss": 0.1759, "step": 2112 }, { "epoch": 4.0, "eval_accuracy": 0.9515552022259763, "eval_f1": 0.7833023132145706, "eval_loss": 0.15085110068321228, "eval_precision": 0.7220588235294118, "eval_recall": 0.8558977338756537, "eval_runtime": 4.5248, "eval_samples_per_second": 206.638, "eval_steps_per_second": 3.315, "step": 2112 }, { "epoch": 5.0, "grad_norm": 1.214239478111267, "learning_rate": 3.7500000000000003e-05, "loss": 0.1563, "step": 2640 }, { "epoch": 5.0, "eval_accuracy": 0.9533439332207095, "eval_f1": 0.7900773539610563, "eval_loss": 0.14215560257434845, "eval_precision": 0.730276134122288, "eval_recall": 0.8605461940732132, "eval_runtime": 4.5421, "eval_samples_per_second": 205.851, "eval_steps_per_second": 3.302, "step": 2640 }, { "epoch": 6.0, "grad_norm": 1.626373291015625, "learning_rate": 3.5e-05, "loss": 0.1464, "step": 3168 }, { "epoch": 6.0, "eval_accuracy": 0.9541389247739243, "eval_f1": 0.7889618922470434, "eval_loss": 0.14294518530368805, "eval_precision": 0.7202495201535508, "eval_recall": 0.8721673445671121, "eval_runtime": 4.5002, "eval_samples_per_second": 207.766, "eval_steps_per_second": 3.333, "step": 3168 }, { "epoch": 7.0, "grad_norm": 3.158684015274048, "learning_rate": 3.2500000000000004e-05, "loss": 0.1394, "step": 3696 }, { "epoch": 7.0, "eval_accuracy": 0.9525489416674948, "eval_f1": 0.7869281045751634, "eval_loss": 0.14403149485588074, "eval_precision": 0.7153041825095057, "eval_recall": 0.8744915746658919, "eval_runtime": 4.5154, "eval_samples_per_second": 207.067, "eval_steps_per_second": 3.322, "step": 3696 }, { "epoch": 8.0, "grad_norm": 0.7790516018867493, "learning_rate": 3e-05, "loss": 0.1325, "step": 4224 }, { "epoch": 8.0, "eval_accuracy": 0.9553314121037464, "eval_f1": 0.796106287818995, "eval_loss": 0.1397974044084549, "eval_precision": 0.7274038461538461, "eval_recall": 0.8791400348634515, "eval_runtime": 4.5264, "eval_samples_per_second": 206.567, "eval_steps_per_second": 3.314, "step": 4224 }, { "epoch": 9.0, "grad_norm": 1.278489589691162, "learning_rate": 2.7500000000000004e-05, "loss": 0.1269, "step": 4752 }, { "epoch": 9.0, "eval_accuracy": 0.9579151346516943, "eval_f1": 0.7998928475756765, "eval_loss": 0.13412921130657196, "eval_precision": 0.7420477137176938, "eval_recall": 0.8675188843695526, "eval_runtime": 4.5121, "eval_samples_per_second": 207.219, "eval_steps_per_second": 3.324, "step": 4752 }, { "epoch": 10.0, "grad_norm": 1.4628835916519165, "learning_rate": 2.5e-05, "loss": 0.124, "step": 5280 }, { "epoch": 10.0, "eval_accuracy": 0.9564742124614926, "eval_f1": 0.8013807753584705, "eval_loss": 0.13306674361228943, "eval_precision": 0.7378973105134474, "eval_recall": 0.8768158047646717, "eval_runtime": 4.5165, "eval_samples_per_second": 207.02, "eval_steps_per_second": 3.321, "step": 5280 }, { "epoch": 11.0, "grad_norm": 2.4508931636810303, "learning_rate": 2.25e-05, "loss": 0.1194, "step": 5808 }, { "epoch": 11.0, "eval_accuracy": 0.9568717082381, "eval_f1": 0.803921568627451, "eval_loss": 0.13287858664989471, "eval_precision": 0.7389186556259133, "eval_recall": 0.8814642649622313, "eval_runtime": 4.525, "eval_samples_per_second": 206.629, "eval_steps_per_second": 3.315, "step": 5808 }, { "epoch": 12.0, "grad_norm": 1.7830716371536255, "learning_rate": 2e-05, "loss": 0.1171, "step": 6336 }, { "epoch": 12.0, "eval_accuracy": 0.9566729603497963, "eval_f1": 0.8026525198938992, "eval_loss": 0.13365061581134796, "eval_precision": 0.7384089799902391, "eval_recall": 0.8791400348634515, "eval_runtime": 4.5462, "eval_samples_per_second": 205.667, "eval_steps_per_second": 3.299, "step": 6336 }, { "epoch": 13.0, "grad_norm": 2.1622517108917236, "learning_rate": 1.75e-05, "loss": 0.1153, "step": 6864 }, { "epoch": 13.0, "eval_accuracy": 0.9586604392328332, "eval_f1": 0.8043826830571886, "eval_loss": 0.129431813955307, "eval_precision": 0.7446808510638298, "eval_recall": 0.8744915746658919, "eval_runtime": 4.5078, "eval_samples_per_second": 207.416, "eval_steps_per_second": 3.328, "step": 6864 }, { "epoch": 14.0, "grad_norm": 1.1562857627868652, "learning_rate": 1.5e-05, "loss": 0.1119, "step": 7392 }, { "epoch": 14.0, "eval_accuracy": 0.9572692040147074, "eval_f1": 0.807794981313401, "eval_loss": 0.13104024529457092, "eval_precision": 0.7471604938271605, "eval_recall": 0.8791400348634515, "eval_runtime": 4.5437, "eval_samples_per_second": 205.78, "eval_steps_per_second": 3.301, "step": 7392 }, { "epoch": 15.0, "grad_norm": 1.6378480195999146, "learning_rate": 1.25e-05, "loss": 0.1109, "step": 7920 }, { "epoch": 15.0, "eval_accuracy": 0.9578654476796185, "eval_f1": 0.803963577932512, "eval_loss": 0.1311807483434677, "eval_precision": 0.7456532538499752, "eval_recall": 0.8721673445671121, "eval_runtime": 4.5022, "eval_samples_per_second": 207.676, "eval_steps_per_second": 3.332, "step": 7920 }, { "epoch": 16.0, "grad_norm": 2.3020808696746826, "learning_rate": 1e-05, "loss": 0.1102, "step": 8448 }, { "epoch": 16.0, "eval_accuracy": 0.9580641955679221, "eval_f1": 0.8060735215769846, "eval_loss": 0.13092459738254547, "eval_precision": 0.7442203639940974, "eval_recall": 0.8791400348634515, "eval_runtime": 4.5251, "eval_samples_per_second": 206.624, "eval_steps_per_second": 3.315, "step": 8448 }, { "epoch": 17.0, "grad_norm": 1.444940209388733, "learning_rate": 7.5e-06, "loss": 0.1095, "step": 8976 }, { "epoch": 17.0, "eval_accuracy": 0.9586604392328332, "eval_f1": 0.8073443320915381, "eval_loss": 0.13140013813972473, "eval_precision": 0.7447226313205695, "eval_recall": 0.8814642649622313, "eval_runtime": 4.5218, "eval_samples_per_second": 206.775, "eval_steps_per_second": 3.317, "step": 8976 }, { "epoch": 18.0, "grad_norm": 1.5871490240097046, "learning_rate": 5e-06, "loss": 0.1073, "step": 9504 }, { "epoch": 18.0, "eval_accuracy": 0.9576666997913147, "eval_f1": 0.8018114011720832, "eval_loss": 0.1323314905166626, "eval_precision": 0.7402852926709297, "eval_recall": 0.8744915746658919, "eval_runtime": 4.5193, "eval_samples_per_second": 206.892, "eval_steps_per_second": 3.319, "step": 9504 }, { "epoch": 19.0, "grad_norm": 0.673528254032135, "learning_rate": 2.5e-06, "loss": 0.107, "step": 10032 }, { "epoch": 19.0, "eval_accuracy": 0.9580641955679221, "eval_f1": 0.800106894708712, "eval_loss": 0.1300082504749298, "eval_precision": 0.7407224146462148, "eval_recall": 0.8698431144683324, "eval_runtime": 4.5363, "eval_samples_per_second": 206.115, "eval_steps_per_second": 3.307, "step": 10032 }, { "epoch": 20.0, "grad_norm": 2.0137035846710205, "learning_rate": 0.0, "loss": 0.1073, "step": 10560 }, { "epoch": 20.0, "eval_accuracy": 0.9578654476796185, "eval_f1": 0.800106894708712, "eval_loss": 0.1304602026939392, "eval_precision": 0.7407224146462148, "eval_recall": 0.8698431144683324, "eval_runtime": 4.5054, "eval_samples_per_second": 207.53, "eval_steps_per_second": 3.329, "step": 10560 }, { "epoch": 20.0, "step": 10560, "total_flos": 4545096690358590.0, "train_loss": 0.17226147796168473, "train_runtime": 1232.3096, "train_samples_per_second": 136.93, "train_steps_per_second": 8.569 } ], "logging_steps": 500, "max_steps": 10560, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4545096690358590.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }