|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.232791900634766, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5568, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6144317942230656, |
|
"eval_loss": 0.48217353224754333, |
|
"eval_precision": 0.65566534914361, |
|
"eval_recall": 0.6074286233860702, |
|
"eval_runtime": 1.8124, |
|
"eval_samples_per_second": 220.155, |
|
"eval_steps_per_second": 27.588, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.6621267795562744, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4661, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7543859649122807, |
|
"eval_f1": 0.7304054054054054, |
|
"eval_loss": 0.44529902935028076, |
|
"eval_precision": 0.7240563585317666, |
|
"eval_recall": 0.7612293144208038, |
|
"eval_runtime": 1.7908, |
|
"eval_samples_per_second": 222.81, |
|
"eval_steps_per_second": 27.921, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.223342180252075, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3875, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8238834717707957, |
|
"eval_loss": 0.3446956276893616, |
|
"eval_precision": 0.8487520627062706, |
|
"eval_recall": 0.8074649936352064, |
|
"eval_runtime": 1.7891, |
|
"eval_samples_per_second": 223.02, |
|
"eval_steps_per_second": 27.947, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 3.7967684268951416, |
|
"learning_rate": 4e-05, |
|
"loss": 0.318, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8267427049559981, |
|
"eval_loss": 0.34423500299453735, |
|
"eval_precision": 0.8158466596088483, |
|
"eval_recall": 0.8436079287143117, |
|
"eval_runtime": 1.786, |
|
"eval_samples_per_second": 223.404, |
|
"eval_steps_per_second": 27.996, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.4756224453449249, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2855, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.826007326007326, |
|
"eval_loss": 0.3348763883113861, |
|
"eval_precision": 0.8157828282828283, |
|
"eval_recall": 0.8411074740861975, |
|
"eval_runtime": 1.7834, |
|
"eval_samples_per_second": 223.733, |
|
"eval_steps_per_second": 28.037, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.1933702230453491, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2638, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8177454831659652, |
|
"eval_loss": 0.35479673743247986, |
|
"eval_precision": 0.805161943319838, |
|
"eval_recall": 0.8472449536279323, |
|
"eval_runtime": 1.7837, |
|
"eval_samples_per_second": 223.688, |
|
"eval_steps_per_second": 28.031, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.3131154775619507, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.2397, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8434065934065934, |
|
"eval_loss": 0.3253527581691742, |
|
"eval_precision": 0.8325757575757575, |
|
"eval_recall": 0.8592471358428806, |
|
"eval_runtime": 1.7938, |
|
"eval_samples_per_second": 222.438, |
|
"eval_steps_per_second": 27.874, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 5.750446319580078, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2428, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8654532336864889, |
|
"eval_loss": 0.2798740565776825, |
|
"eval_precision": 0.8804269882659713, |
|
"eval_recall": 0.8537461356610292, |
|
"eval_runtime": 1.7849, |
|
"eval_samples_per_second": 223.546, |
|
"eval_steps_per_second": 28.013, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.672217845916748, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2229, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8484099018899409, |
|
"eval_loss": 0.29030779004096985, |
|
"eval_precision": 0.8430645161290322, |
|
"eval_recall": 0.8545644662665939, |
|
"eval_runtime": 1.7837, |
|
"eval_samples_per_second": 223.689, |
|
"eval_steps_per_second": 28.031, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 2.1360011100769043, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2144, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8769602202215754, |
|
"eval_loss": 0.2583388686180115, |
|
"eval_precision": 0.8742831541218639, |
|
"eval_recall": 0.8797963266048372, |
|
"eval_runtime": 1.7944, |
|
"eval_samples_per_second": 222.362, |
|
"eval_steps_per_second": 27.865, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.677872657775879, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1967, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8622085718274466, |
|
"eval_loss": 0.27431806921958923, |
|
"eval_precision": 0.8530168716042322, |
|
"eval_recall": 0.8741589379887251, |
|
"eval_runtime": 1.7823, |
|
"eval_samples_per_second": 223.874, |
|
"eval_steps_per_second": 28.054, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.564518451690674, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1855, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8563451067988272, |
|
"eval_loss": 0.29132312536239624, |
|
"eval_precision": 0.8473119816985988, |
|
"eval_recall": 0.8681123840698308, |
|
"eval_runtime": 1.7831, |
|
"eval_samples_per_second": 223.773, |
|
"eval_steps_per_second": 28.042, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 2.314499855041504, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.1761, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8786430103333984, |
|
"eval_loss": 0.26596176624298096, |
|
"eval_precision": 0.8913001481099878, |
|
"eval_recall": 0.8683396981269322, |
|
"eval_runtime": 1.7906, |
|
"eval_samples_per_second": 222.829, |
|
"eval_steps_per_second": 27.923, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 7.584296226501465, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1733, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8822055137844611, |
|
"eval_f1": 0.8622085718274466, |
|
"eval_loss": 0.28683483600616455, |
|
"eval_precision": 0.8530168716042322, |
|
"eval_recall": 0.8741589379887251, |
|
"eval_runtime": 1.7857, |
|
"eval_samples_per_second": 223.438, |
|
"eval_steps_per_second": 28.0, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.435178518295288, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1582, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8847117794486216, |
|
"eval_f1": 0.8648373983739837, |
|
"eval_loss": 0.28010857105255127, |
|
"eval_precision": 0.8561154177433248, |
|
"eval_recall": 0.8759319876341153, |
|
"eval_runtime": 1.788, |
|
"eval_samples_per_second": 223.159, |
|
"eval_steps_per_second": 27.965, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 1.7755215167999268, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1537, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8550061050061051, |
|
"eval_loss": 0.30731528997421265, |
|
"eval_precision": 0.8437710437710437, |
|
"eval_recall": 0.8713402436806692, |
|
"eval_runtime": 1.7883, |
|
"eval_samples_per_second": 223.118, |
|
"eval_steps_per_second": 27.96, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.1951849460601807, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1537, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8781334505389722, |
|
"eval_loss": 0.2702249586582184, |
|
"eval_precision": 0.872316715542522, |
|
"eval_recall": 0.8847972358610656, |
|
"eval_runtime": 1.7845, |
|
"eval_samples_per_second": 223.597, |
|
"eval_steps_per_second": 28.02, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.639573097229004, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1461, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8760282890453928, |
|
"eval_loss": 0.29228049516677856, |
|
"eval_precision": 0.8682260305697083, |
|
"eval_recall": 0.8855246408437898, |
|
"eval_runtime": 1.8077, |
|
"eval_samples_per_second": 220.726, |
|
"eval_steps_per_second": 27.66, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.177137613296509, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1449, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8947368421052632, |
|
"eval_f1": 0.8754533563232059, |
|
"eval_loss": 0.27906104922294617, |
|
"eval_precision": 0.8689781021897811, |
|
"eval_recall": 0.8830241862156756, |
|
"eval_runtime": 1.7931, |
|
"eval_samples_per_second": 222.52, |
|
"eval_steps_per_second": 27.885, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.6333515048027039, |
|
"learning_rate": 0.0, |
|
"loss": 0.1502, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8781334505389722, |
|
"eval_loss": 0.2797949016094208, |
|
"eval_precision": 0.872316715542522, |
|
"eval_recall": 0.8847972358610656, |
|
"eval_runtime": 1.7912, |
|
"eval_samples_per_second": 222.75, |
|
"eval_steps_per_second": 27.914, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.24180538537072355, |
|
"train_runtime": 620.8783, |
|
"train_samples_per_second": 117.189, |
|
"train_steps_per_second": 3.93 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|