{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 10560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.8086087107658386, "learning_rate": 4.75e-05, "loss": 0.4434, "step": 528 }, { "epoch": 1.0, "eval_accuracy": 0.9455927655768658, "eval_f1": 0.7605633802816901, "eval_loss": 0.16302905976772308, "eval_precision": 0.679945054945055, "eval_recall": 0.862870424171993, "eval_runtime": 4.5797, "eval_samples_per_second": 204.163, "eval_steps_per_second": 3.275, "step": 528 }, { "epoch": 2.0, "grad_norm": 0.7114465832710266, "learning_rate": 4.5e-05, "loss": 0.1462, "step": 1056 }, { "epoch": 2.0, "eval_accuracy": 0.9566729603497963, "eval_f1": 0.8073836276083467, "eval_loss": 0.12938551604747772, "eval_precision": 0.7481408031730292, "eval_recall": 0.8768158047646717, "eval_runtime": 4.6537, "eval_samples_per_second": 200.915, "eval_steps_per_second": 3.223, "step": 1056 }, { "epoch": 3.0, "grad_norm": 1.316323161125183, "learning_rate": 4.25e-05, "loss": 0.1183, "step": 1584 }, { "epoch": 3.0, "eval_accuracy": 0.9568717082381, "eval_f1": 0.8116639914392724, "eval_loss": 0.13784636557102203, "eval_precision": 0.7521070897372335, "eval_recall": 0.8814642649622313, "eval_runtime": 4.903, "eval_samples_per_second": 190.701, "eval_steps_per_second": 3.059, "step": 1584 }, { "epoch": 4.0, "grad_norm": 1.4418916702270508, "learning_rate": 4e-05, "loss": 0.1012, "step": 2112 }, { "epoch": 4.0, "eval_accuracy": 0.9596541786743515, "eval_f1": 0.8231144872490505, "eval_loss": 0.1358918398618698, "eval_precision": 0.7720101781170483, "eval_recall": 0.8814642649622313, "eval_runtime": 4.7436, "eval_samples_per_second": 197.109, "eval_steps_per_second": 3.162, "step": 2112 }, { "epoch": 5.0, "grad_norm": 1.0950379371643066, "learning_rate": 3.7500000000000003e-05, "loss": 0.0884, "step": 2640 }, { "epoch": 5.0, "eval_accuracy": 0.9622379012222995, "eval_f1": 0.8348926802421575, "eval_loss": 0.12661471962928772, "eval_precision": 0.7929952953476216, "eval_recall": 0.8814642649622313, "eval_runtime": 4.6673, "eval_samples_per_second": 200.331, "eval_steps_per_second": 3.214, "step": 2640 }, { "epoch": 6.0, "grad_norm": 1.270456314086914, "learning_rate": 3.5e-05, "loss": 0.0793, "step": 3168 }, { "epoch": 6.0, "eval_accuracy": 0.9610454138924774, "eval_f1": 0.8404432132963989, "eval_loss": 0.1408655196428299, "eval_precision": 0.803070407623081, "eval_recall": 0.8814642649622313, "eval_runtime": 4.6946, "eval_samples_per_second": 199.164, "eval_steps_per_second": 3.195, "step": 3168 }, { "epoch": 7.0, "grad_norm": 1.6711246967315674, "learning_rate": 3.2500000000000004e-05, "loss": 0.072, "step": 3696 }, { "epoch": 7.0, "eval_accuracy": 0.9588591871211368, "eval_f1": 0.8222222222222223, "eval_loss": 0.1545909196138382, "eval_precision": 0.7704418486541391, "eval_recall": 0.8814642649622313, "eval_runtime": 4.6637, "eval_samples_per_second": 200.485, "eval_steps_per_second": 3.216, "step": 3696 }, { "epoch": 8.0, "grad_norm": 0.5584707260131836, "learning_rate": 3e-05, "loss": 0.067, "step": 4224 }, { "epoch": 8.0, "eval_accuracy": 0.9608466660041737, "eval_f1": 0.8334258745141587, "eval_loss": 0.14326535165309906, "eval_precision": 0.797979797979798, "eval_recall": 0.8721673445671121, "eval_runtime": 4.6647, "eval_samples_per_second": 200.444, "eval_steps_per_second": 3.216, "step": 4224 }, { "epoch": 9.0, "grad_norm": 0.5288811326026917, "learning_rate": 2.7500000000000004e-05, "loss": 0.0607, "step": 4752 }, { "epoch": 9.0, "eval_accuracy": 0.9598529265626553, "eval_f1": 0.8312328767123287, "eval_loss": 0.1468406319618225, "eval_precision": 0.7864178330741317, "eval_recall": 0.8814642649622313, "eval_runtime": 4.6583, "eval_samples_per_second": 200.719, "eval_steps_per_second": 3.22, "step": 4752 }, { "epoch": 10.0, "grad_norm": 1.041274070739746, "learning_rate": 2.5e-05, "loss": 0.0562, "step": 5280 }, { "epoch": 10.0, "eval_accuracy": 0.9612441617807811, "eval_f1": 0.8267029972752045, "eval_loss": 0.14973483979701996, "eval_precision": 0.7783478707029245, "eval_recall": 0.8814642649622313, "eval_runtime": 4.6532, "eval_samples_per_second": 200.937, "eval_steps_per_second": 3.224, "step": 5280 }, { "epoch": 11.0, "grad_norm": 1.7796343564987183, "learning_rate": 2.25e-05, "loss": 0.0506, "step": 5808 }, { "epoch": 11.0, "eval_accuracy": 0.9594554307860479, "eval_f1": 0.8332413031474324, "eval_loss": 0.1600087583065033, "eval_precision": 0.793792740662809, "eval_recall": 0.8768158047646717, "eval_runtime": 4.6756, "eval_samples_per_second": 199.975, "eval_steps_per_second": 3.208, "step": 5808 }, { "epoch": 12.0, "grad_norm": 1.6123548746109009, "learning_rate": 2e-05, "loss": 0.0483, "step": 6336 }, { "epoch": 12.0, "eval_accuracy": 0.9608466660041737, "eval_f1": 0.8328721638074155, "eval_loss": 0.15964019298553467, "eval_precision": 0.7950343370311674, "eval_recall": 0.8744915746658919, "eval_runtime": 4.6719, "eval_samples_per_second": 200.134, "eval_steps_per_second": 3.211, "step": 6336 }, { "epoch": 13.0, "grad_norm": 0.9525193572044373, "learning_rate": 1.75e-05, "loss": 0.0443, "step": 6864 }, { "epoch": 13.0, "eval_accuracy": 0.96064791811587, "eval_f1": 0.8237547892720306, "eval_loss": 0.15955425798892975, "eval_precision": 0.7785825142265907, "eval_recall": 0.8744915746658919, "eval_runtime": 4.6803, "eval_samples_per_second": 199.774, "eval_steps_per_second": 3.205, "step": 6864 }, { "epoch": 14.0, "grad_norm": 0.5356388688087463, "learning_rate": 1.5e-05, "loss": 0.0421, "step": 7392 }, { "epoch": 14.0, "eval_accuracy": 0.9612441617807811, "eval_f1": 0.8350857775318208, "eval_loss": 0.16503094136714935, "eval_precision": 0.7971473851030111, "eval_recall": 0.8768158047646717, "eval_runtime": 4.671, "eval_samples_per_second": 200.172, "eval_steps_per_second": 3.211, "step": 7392 }, { "epoch": 15.0, "grad_norm": 0.6785407662391663, "learning_rate": 1.25e-05, "loss": 0.0395, "step": 7920 }, { "epoch": 15.0, "eval_accuracy": 0.9602504223392626, "eval_f1": 0.8284449363586054, "eval_loss": 0.16934077441692352, "eval_precision": 0.7908082408874801, "eval_recall": 0.8698431144683324, "eval_runtime": 4.6607, "eval_samples_per_second": 200.612, "eval_steps_per_second": 3.218, "step": 7920 }, { "epoch": 16.0, "grad_norm": 1.843337893486023, "learning_rate": 1e-05, "loss": 0.0375, "step": 8448 }, { "epoch": 16.0, "eval_accuracy": 0.9594554307860479, "eval_f1": 0.8336088154269972, "eval_loss": 0.17250221967697144, "eval_precision": 0.7925615505500262, "eval_recall": 0.8791400348634515, "eval_runtime": 4.6872, "eval_samples_per_second": 199.478, "eval_steps_per_second": 3.2, "step": 8448 }, { "epoch": 17.0, "grad_norm": 0.6304071545600891, "learning_rate": 7.5e-06, "loss": 0.0358, "step": 8976 }, { "epoch": 17.0, "eval_accuracy": 0.9612441617807811, "eval_f1": 0.8321289605336298, "eval_loss": 0.17892615497112274, "eval_precision": 0.7975492807671817, "eval_recall": 0.8698431144683324, "eval_runtime": 4.6497, "eval_samples_per_second": 201.086, "eval_steps_per_second": 3.226, "step": 8976 }, { "epoch": 18.0, "grad_norm": 1.1330559253692627, "learning_rate": 5e-06, "loss": 0.0339, "step": 9504 }, { "epoch": 18.0, "eval_accuracy": 0.960051674450959, "eval_f1": 0.8225895316804408, "eval_loss": 0.17817425727844238, "eval_precision": 0.7820848611838659, "eval_recall": 0.8675188843695526, "eval_runtime": 4.6927, "eval_samples_per_second": 199.247, "eval_steps_per_second": 3.196, "step": 9504 }, { "epoch": 19.0, "grad_norm": 0.08901867270469666, "learning_rate": 2.5e-06, "loss": 0.0327, "step": 10032 }, { "epoch": 19.0, "eval_accuracy": 0.9620391533339958, "eval_f1": 0.833983286908078, "eval_loss": 0.17433172464370728, "eval_precision": 0.8009630818619583, "eval_recall": 0.8698431144683324, "eval_runtime": 4.6614, "eval_samples_per_second": 200.583, "eval_steps_per_second": 3.218, "step": 10032 }, { "epoch": 20.0, "grad_norm": 1.3878382444381714, "learning_rate": 0.0, "loss": 0.0327, "step": 10560 }, { "epoch": 20.0, "eval_accuracy": 0.9608466660041737, "eval_f1": 0.8321289605336298, "eval_loss": 0.17778323590755463, "eval_precision": 0.7975492807671817, "eval_recall": 0.8698431144683324, "eval_runtime": 4.9618, "eval_samples_per_second": 188.439, "eval_steps_per_second": 3.023, "step": 10560 }, { "epoch": 20.0, "step": 10560, "total_flos": 5062583230111038.0, "train_loss": 0.08150525255636736, "train_runtime": 1343.1683, "train_samples_per_second": 125.628, "train_steps_per_second": 7.862 } ], "logging_steps": 500, "max_steps": 10560, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 5062583230111038.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }