{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.196406364440918, "learning_rate": 4.75e-05, "loss": 0.5417, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7543859649122807, "eval_f1": 0.6730602006688964, "eval_loss": 0.47316503524780273, "eval_precision": 0.7027985359158151, "eval_recall": 0.6612111292962357, "eval_runtime": 1.7797, "eval_samples_per_second": 224.194, "eval_steps_per_second": 28.094, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.380220651626587, "learning_rate": 4.5e-05, "loss": 0.4395, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7919799498746867, "eval_f1": 0.7705259948585406, "eval_loss": 0.4128379225730896, "eval_precision": 0.7612839958158996, "eval_recall": 0.8028277868703401, "eval_runtime": 1.7767, "eval_samples_per_second": 224.574, "eval_steps_per_second": 28.142, "step": 244 }, { "epoch": 3.0, "grad_norm": 2.2583518028259277, "learning_rate": 4.25e-05, "loss": 0.3319, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8315033783783784, "eval_loss": 0.32298392057418823, "eval_precision": 0.8438775510204082, "eval_recall": 0.8217403164211674, "eval_runtime": 1.7811, "eval_samples_per_second": 224.023, "eval_steps_per_second": 28.073, "step": 366 }, { "epoch": 4.0, "grad_norm": 0.8704787492752075, "learning_rate": 4e-05, "loss": 0.2873, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8237962290701417, "eval_loss": 0.322201669216156, "eval_precision": 0.8201159969225307, "eval_recall": 0.8278777959629023, "eval_runtime": 1.7743, "eval_samples_per_second": 224.873, "eval_steps_per_second": 28.18, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.8079779148101807, "learning_rate": 3.7500000000000003e-05, "loss": 0.2571, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8484099018899409, "eval_loss": 0.29681602120399475, "eval_precision": 0.8430645161290322, "eval_recall": 0.8545644662665939, "eval_runtime": 1.7796, "eval_samples_per_second": 224.208, "eval_steps_per_second": 28.096, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.8642373085021973, "learning_rate": 3.5e-05, "loss": 0.2443, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8466330637850383, "eval_loss": 0.29177311062812805, "eval_precision": 0.8353276671885485, "eval_recall": 0.8635206401163849, "eval_runtime": 1.7765, "eval_samples_per_second": 224.594, "eval_steps_per_second": 28.145, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.3272399306297302, "learning_rate": 3.2500000000000004e-05, "loss": 0.2256, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8447157518450185, "eval_loss": 0.2981509566307068, "eval_precision": 0.8325401217487549, "eval_recall": 0.864248045099109, "eval_runtime": 1.7799, "eval_samples_per_second": 224.168, "eval_steps_per_second": 28.091, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.9290239810943604, "learning_rate": 3e-05, "loss": 0.2172, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8646934961080748, "eval_loss": 0.2722471356391907, "eval_precision": 0.882551000198059, "eval_recall": 0.8512456810329151, "eval_runtime": 1.7818, "eval_samples_per_second": 223.935, "eval_steps_per_second": 28.062, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.677186012268066, "learning_rate": 2.7500000000000004e-05, "loss": 0.2049, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8748655913978494, "eval_loss": 0.264840692281723, "eval_precision": 0.86983032873807, "eval_recall": 0.8805237315875614, "eval_runtime": 1.7819, "eval_samples_per_second": 223.922, "eval_steps_per_second": 28.06, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.770735502243042, "learning_rate": 2.5e-05, "loss": 0.1914, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8848664457009163, "eval_loss": 0.2680298984050751, "eval_precision": 0.8977236138837015, "eval_recall": 0.8743862520458265, "eval_runtime": 1.7909, "eval_samples_per_second": 222.787, "eval_steps_per_second": 27.918, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.3618270754814148, "learning_rate": 2.25e-05, "loss": 0.1724, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8808243727598566, "eval_loss": 0.264539897441864, "eval_precision": 0.875706963591375, "eval_recall": 0.8865702855064557, "eval_runtime": 1.7802, "eval_samples_per_second": 224.138, "eval_steps_per_second": 28.087, "step": 1342 }, { "epoch": 12.0, "grad_norm": 2.054783582687378, "learning_rate": 2e-05, "loss": 0.1689, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.881931703852755, "eval_loss": 0.27462852001190186, "eval_precision": 0.8740012737378415, "eval_recall": 0.8915711947626841, "eval_runtime": 1.778, "eval_samples_per_second": 224.405, "eval_steps_per_second": 28.121, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.6106524467468262, "learning_rate": 1.75e-05, "loss": 0.1473, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8800854160075926, "eval_loss": 0.28370755910873413, "eval_precision": 0.9002425410326267, "eval_recall": 0.8651118385160939, "eval_runtime": 1.785, "eval_samples_per_second": 223.535, "eval_steps_per_second": 28.012, "step": 1586 }, { "epoch": 14.0, "grad_norm": 4.3967108726501465, "learning_rate": 1.5e-05, "loss": 0.1577, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.884617951284618, "eval_loss": 0.2892190217971802, "eval_precision": 0.8772893772893773, "eval_recall": 0.8933442444080741, "eval_runtime": 1.7818, "eval_samples_per_second": 223.936, "eval_steps_per_second": 28.062, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.48143357038497925, "learning_rate": 1.25e-05, "loss": 0.1468, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8829621606985718, "eval_loss": 0.27894169092178345, "eval_precision": 0.8802419354838709, "eval_recall": 0.8858428805237315, "eval_runtime": 1.7862, "eval_samples_per_second": 223.376, "eval_steps_per_second": 27.992, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.6658376455307007, "learning_rate": 1e-05, "loss": 0.1473, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8775533117267087, "eval_loss": 0.28521186113357544, "eval_precision": 0.873246730188791, "eval_recall": 0.8822967812329514, "eval_runtime": 1.778, "eval_samples_per_second": 224.41, "eval_steps_per_second": 28.121, "step": 1952 }, { "epoch": 17.0, "grad_norm": 1.4181182384490967, "learning_rate": 7.5e-06, "loss": 0.1274, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8856836962422341, "eval_loss": 0.28584179282188416, "eval_precision": 0.8838235294117647, "eval_recall": 0.8876159301691217, "eval_runtime": 1.7792, "eval_samples_per_second": 224.253, "eval_steps_per_second": 28.102, "step": 2074 }, { "epoch": 18.0, "grad_norm": 5.91610050201416, "learning_rate": 5e-06, "loss": 0.1318, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8802521008403361, "eval_loss": 0.29269006848335266, "eval_precision": 0.8767168083714847, "eval_recall": 0.8840698308783415, "eval_runtime": 1.7862, "eval_samples_per_second": 223.383, "eval_steps_per_second": 27.993, "step": 2196 }, { "epoch": 19.0, "grad_norm": 6.210901737213135, "learning_rate": 2.5e-06, "loss": 0.1355, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8884169154604891, "eval_loss": 0.2884277403354645, "eval_precision": 0.8874630556728391, "eval_recall": 0.8893889798145117, "eval_runtime": 1.7806, "eval_samples_per_second": 224.08, "eval_steps_per_second": 28.08, "step": 2318 }, { "epoch": 20.0, "grad_norm": 1.5748217105865479, "learning_rate": 0.0, "loss": 0.1367, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8884169154604891, "eval_loss": 0.2881532311439514, "eval_precision": 0.8874630556728391, "eval_recall": 0.8893889798145117, "eval_runtime": 1.7811, "eval_samples_per_second": 224.016, "eval_steps_per_second": 28.072, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.2206378909408069, "train_runtime": 624.8021, "train_samples_per_second": 116.453, "train_steps_per_second": 3.905 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }