{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.928847551345825, "learning_rate": 4.75e-05, "loss": 0.5514, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7218045112781954, "eval_f1": 0.6545993371027491, "eval_loss": 0.5084339380264282, "eval_precision": 0.6600553802562947, "eval_recall": 0.6506637570467357, "eval_runtime": 1.7877, "eval_samples_per_second": 223.186, "eval_steps_per_second": 27.968, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.589020013809204, "learning_rate": 4.5e-05, "loss": 0.4753, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8170426065162907, "eval_f1": 0.7662453352594198, "eval_loss": 0.40073099732398987, "eval_precision": 0.7870255775577557, "eval_recall": 0.7530460083651573, "eval_runtime": 1.8277, "eval_samples_per_second": 218.302, "eval_steps_per_second": 27.356, "step": 244 }, { "epoch": 3.0, "grad_norm": 7.217240810394287, "learning_rate": 4.25e-05, "loss": 0.3834, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.7805280528052805, "eval_loss": 0.35415172576904297, "eval_precision": 0.8448765432098766, "eval_recall": 0.7540007274049827, "eval_runtime": 1.8275, "eval_samples_per_second": 218.333, "eval_steps_per_second": 27.36, "step": 366 }, { "epoch": 4.0, "grad_norm": 1.0342143774032593, "learning_rate": 4e-05, "loss": 0.3188, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8333016825553572, "eval_loss": 0.32140952348709106, "eval_precision": 0.8341507249908615, "eval_recall": 0.8324695399163484, "eval_runtime": 1.8265, "eval_samples_per_second": 218.445, "eval_steps_per_second": 27.374, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.17711341381073, "learning_rate": 3.7500000000000003e-05, "loss": 0.2981, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.855319904024935, "eval_loss": 0.2984267771244049, "eval_precision": 0.862378106322743, "eval_recall": 0.8491543917075832, "eval_runtime": 1.8288, "eval_samples_per_second": 218.18, "eval_steps_per_second": 27.341, "step": 610 }, { "epoch": 6.0, "grad_norm": 4.153679370880127, "learning_rate": 3.5e-05, "loss": 0.2835, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8368354828562441, "eval_loss": 0.28104665875434875, "eval_precision": 0.8520237470480189, "eval_recall": 0.8252864157119476, "eval_runtime": 1.8275, "eval_samples_per_second": 218.335, "eval_steps_per_second": 27.36, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.0657362937927246, "learning_rate": 3.2500000000000004e-05, "loss": 0.2517, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8622036668943447, "eval_loss": 0.28660351037979126, "eval_precision": 0.8671602787456446, "eval_recall": 0.8577014002545917, "eval_runtime": 1.7903, "eval_samples_per_second": 222.862, "eval_steps_per_second": 27.928, "step": 854 }, { "epoch": 8.0, "grad_norm": 3.21960711479187, "learning_rate": 3e-05, "loss": 0.2374, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8485289465359063, "eval_loss": 0.2996794879436493, "eval_precision": 0.8671008040401356, "eval_recall": 0.8348790689216221, "eval_runtime": 1.7958, "eval_samples_per_second": 222.185, "eval_steps_per_second": 27.843, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.313467025756836, "learning_rate": 2.7500000000000004e-05, "loss": 0.2293, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8518472677764712, "eval_loss": 0.29089975357055664, "eval_precision": 0.8599810186649794, "eval_recall": 0.844880887434079, "eval_runtime": 1.7949, "eval_samples_per_second": 222.299, "eval_steps_per_second": 27.857, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.5997090339660645, "learning_rate": 2.5e-05, "loss": 0.2091, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8589543987905864, "eval_loss": 0.29276683926582336, "eval_precision": 0.8564068100358423, "eval_recall": 0.8616566648481543, "eval_runtime": 1.7944, "eval_samples_per_second": 222.357, "eval_steps_per_second": 27.864, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.3290369510650635, "learning_rate": 2.25e-05, "loss": 0.198, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8569892473118279, "eval_loss": 0.28468698263168335, "eval_precision": 0.8522004241781549, "eval_recall": 0.8623840698308783, "eval_runtime": 1.8002, "eval_samples_per_second": 221.645, "eval_steps_per_second": 27.775, "step": 1342 }, { "epoch": 12.0, "grad_norm": 5.568458557128906, "learning_rate": 2e-05, "loss": 0.1906, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.843111041207927, "eval_loss": 0.31200090050697327, "eval_precision": 0.8585673051692468, "eval_recall": 0.831332969630842, "eval_runtime": 1.7936, "eval_samples_per_second": 222.458, "eval_steps_per_second": 27.877, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.5475130081176758, "learning_rate": 1.75e-05, "loss": 0.1818, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.850729517396184, "eval_loss": 0.2906310558319092, "eval_precision": 0.8535087719298247, "eval_recall": 0.8481087470449173, "eval_runtime": 1.795, "eval_samples_per_second": 222.279, "eval_steps_per_second": 27.854, "step": 1586 }, { "epoch": 14.0, "grad_norm": 5.56436014175415, "learning_rate": 1.5e-05, "loss": 0.1756, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8514869535493182, "eval_loss": 0.28101998567581177, "eval_precision": 0.8523821128305106, "eval_recall": 0.8506092016730314, "eval_runtime": 1.7948, "eval_samples_per_second": 222.311, "eval_steps_per_second": 27.859, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.7677656412124634, "learning_rate": 1.25e-05, "loss": 0.174, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8595070422535211, "eval_loss": 0.2828814685344696, "eval_precision": 0.8633733523114054, "eval_recall": 0.8559283506092017, "eval_runtime": 1.7987, "eval_samples_per_second": 221.825, "eval_steps_per_second": 27.798, "step": 1830 }, { "epoch": 16.0, "grad_norm": 2.1199889183044434, "learning_rate": 1e-05, "loss": 0.1705, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.855319904024935, "eval_loss": 0.292202889919281, "eval_precision": 0.862378106322743, "eval_recall": 0.8491543917075832, "eval_runtime": 1.7972, "eval_samples_per_second": 222.011, "eval_steps_per_second": 27.821, "step": 1952 }, { "epoch": 17.0, "grad_norm": 2.521127223968506, "learning_rate": 7.5e-06, "loss": 0.1509, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8568221901555235, "eval_loss": 0.2991296947002411, "eval_precision": 0.8596491228070176, "eval_recall": 0.8541553009638116, "eval_runtime": 1.7965, "eval_samples_per_second": 222.104, "eval_steps_per_second": 27.833, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.4539577960968018, "learning_rate": 5e-06, "loss": 0.1549, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.855319904024935, "eval_loss": 0.3000391125679016, "eval_precision": 0.862378106322743, "eval_recall": 0.8491543917075832, "eval_runtime": 1.7991, "eval_samples_per_second": 221.772, "eval_steps_per_second": 27.791, "step": 2196 }, { "epoch": 19.0, "grad_norm": 1.3508776426315308, "learning_rate": 2.5e-06, "loss": 0.1469, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.2942558228969574, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 1.8067, "eval_samples_per_second": 220.841, "eval_steps_per_second": 27.674, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.1654489040374756, "learning_rate": 0.0, "loss": 0.1493, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.855319904024935, "eval_loss": 0.3026413023471832, "eval_precision": 0.862378106322743, "eval_recall": 0.8491543917075832, "eval_runtime": 1.7968, "eval_samples_per_second": 222.067, "eval_steps_per_second": 27.828, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8460375995160000.0, "train_loss": 0.24652244458433056, "train_runtime": 623.7969, "train_samples_per_second": 116.865, "train_steps_per_second": 3.912 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8460375995160000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }