{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 3.569611072540283, "learning_rate": 4.75e-05, "loss": 0.5438, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7218045112781954, "eval_f1": 0.6545993371027491, "eval_loss": 0.49882158637046814, "eval_precision": 0.6600553802562947, "eval_recall": 0.6506637570467357, "eval_runtime": 5.2428, "eval_samples_per_second": 76.104, "eval_steps_per_second": 9.537, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.2452878952026367, "learning_rate": 4.5e-05, "loss": 0.4428, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8161454307628278, "eval_loss": 0.3788329064846039, "eval_precision": 0.8107299270072992, "eval_recall": 0.8225586470267321, "eval_runtime": 5.1661, "eval_samples_per_second": 77.234, "eval_steps_per_second": 9.679, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.797173500061035, "learning_rate": 4.25e-05, "loss": 0.3441, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8179269882659713, "eval_loss": 0.3289283514022827, "eval_precision": 0.8510239760239761, "eval_recall": 0.7981905801054737, "eval_runtime": 5.1072, "eval_samples_per_second": 78.125, "eval_steps_per_second": 9.79, "step": 366 }, { "epoch": 4.0, "grad_norm": 1.9981327056884766, "learning_rate": 4e-05, "loss": 0.2986, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8533986527862829, "eval_loss": 0.28838610649108887, "eval_precision": 0.8572003218020917, "eval_recall": 0.8498817966903074, "eval_runtime": 5.1209, "eval_samples_per_second": 77.916, "eval_steps_per_second": 9.764, "step": 488 }, { "epoch": 5.0, "grad_norm": 1.763756513595581, "learning_rate": 3.7500000000000003e-05, "loss": 0.2667, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.850729517396184, "eval_loss": 0.26981133222579956, "eval_precision": 0.8535087719298247, "eval_recall": 0.8481087470449173, "eval_runtime": 5.1232, "eval_samples_per_second": 77.881, "eval_steps_per_second": 9.759, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.7370102405548096, "learning_rate": 3.5e-05, "loss": 0.2524, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8609292598654301, "eval_loss": 0.27233511209487915, "eval_precision": 0.8609292598654301, "eval_recall": 0.8609292598654301, "eval_runtime": 5.1586, "eval_samples_per_second": 77.347, "eval_steps_per_second": 9.693, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.2413272857666016, "learning_rate": 3.2500000000000004e-05, "loss": 0.2343, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8265664241097301, "eval_loss": 0.3179858326911926, "eval_precision": 0.8532894736842105, "eval_recall": 0.8092380432805966, "eval_runtime": 5.1498, "eval_samples_per_second": 77.479, "eval_steps_per_second": 9.709, "step": 854 }, { "epoch": 8.0, "grad_norm": 3.469871997833252, "learning_rate": 3e-05, "loss": 0.2212, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.852937255424767, "eval_loss": 0.29489362239837646, "eval_precision": 0.8674217731421121, "eval_recall": 0.8416530278232406, "eval_runtime": 5.155, "eval_samples_per_second": 77.4, "eval_steps_per_second": 9.699, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.085997581481934, "learning_rate": 2.7500000000000004e-05, "loss": 0.2142, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8564658408408408, "eval_loss": 0.2828481197357178, "eval_precision": 0.8697278911564625, "eval_recall": 0.8459265320967448, "eval_runtime": 5.1873, "eval_samples_per_second": 76.919, "eval_steps_per_second": 9.639, "step": 1098 }, { "epoch": 10.0, "grad_norm": 3.2397539615631104, "learning_rate": 2.5e-05, "loss": 0.1958, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8457993935430168, "eval_loss": 0.28871509432792664, "eval_precision": 0.8398540145985401, "eval_recall": 0.8527914166212038, "eval_runtime": 5.1373, "eval_samples_per_second": 77.667, "eval_steps_per_second": 9.733, "step": 1220 }, { "epoch": 11.0, "grad_norm": 2.4002835750579834, "learning_rate": 2.25e-05, "loss": 0.1855, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8602993213495533, "eval_loss": 0.2867955267429352, "eval_precision": 0.8547653958944281, "eval_recall": 0.8666575741043827, "eval_runtime": 5.1317, "eval_samples_per_second": 77.752, "eval_steps_per_second": 9.743, "step": 1342 }, { "epoch": 12.0, "grad_norm": 1.633034348487854, "learning_rate": 2e-05, "loss": 0.1742, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8448388501742161, "eval_loss": 0.29811105132102966, "eval_precision": 0.8551721930610677, "eval_recall": 0.8363338788870704, "eval_runtime": 5.1494, "eval_samples_per_second": 77.484, "eval_steps_per_second": 9.71, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.338294506072998, "learning_rate": 1.75e-05, "loss": 0.1601, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8556004584112431, "eval_loss": 0.29304954409599304, "eval_precision": 0.8538865546218487, "eval_recall": 0.85738316057465, "eval_runtime": 5.0982, "eval_samples_per_second": 78.263, "eval_steps_per_second": 9.807, "step": 1586 }, { "epoch": 14.0, "grad_norm": 1.3382197618484497, "learning_rate": 1.5e-05, "loss": 0.1602, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8589607635206786, "eval_loss": 0.29793980717658997, "eval_precision": 0.8503875968992248, "eval_recall": 0.8698854337152209, "eval_runtime": 5.1508, "eval_samples_per_second": 77.464, "eval_steps_per_second": 9.707, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.5800030827522278, "learning_rate": 1.25e-05, "loss": 0.1497, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8662440310793597, "eval_loss": 0.29690659046173096, "eval_precision": 0.8606158357771261, "eval_recall": 0.872704128023277, "eval_runtime": 5.1461, "eval_samples_per_second": 77.534, "eval_steps_per_second": 9.716, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.755856037139893, "learning_rate": 1e-05, "loss": 0.1447, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8616171059774413, "eval_loss": 0.29627636075019836, "eval_precision": 0.859873949579832, "eval_recall": 0.8634297144935443, "eval_runtime": 5.1639, "eval_samples_per_second": 77.267, "eval_steps_per_second": 9.683, "step": 1952 }, { "epoch": 17.0, "grad_norm": 2.1538662910461426, "learning_rate": 7.5e-06, "loss": 0.1394, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8589543987905864, "eval_loss": 0.30176377296447754, "eval_precision": 0.8564068100358423, "eval_recall": 0.8616566648481543, "eval_runtime": 5.17, "eval_samples_per_second": 77.176, "eval_steps_per_second": 9.671, "step": 2074 }, { "epoch": 18.0, "grad_norm": 2.54630708694458, "learning_rate": 5e-06, "loss": 0.1333, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.30650317668914795, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 5.128, "eval_samples_per_second": 77.807, "eval_steps_per_second": 9.75, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.7696042656898499, "learning_rate": 2.5e-06, "loss": 0.1406, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.30623340606689453, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 5.1122, "eval_samples_per_second": 78.049, "eval_steps_per_second": 9.781, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.025254487991333, "learning_rate": 0.0, "loss": 0.1243, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.858259325044405, "eval_loss": 0.30716511607170105, "eval_precision": 0.8573798178418481, "eval_recall": 0.8591562102200401, "eval_runtime": 5.0928, "eval_samples_per_second": 78.347, "eval_steps_per_second": 9.818, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8460375995160000.0, "train_loss": 0.22629446436147221, "train_runtime": 1956.0503, "train_samples_per_second": 37.269, "train_steps_per_second": 1.247 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8460375995160000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }