|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.144454002380371, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5655, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6548442906574394, |
|
"eval_loss": 0.5178584456443787, |
|
"eval_precision": 0.6622605615324062, |
|
"eval_recall": 0.6499363520640116, |
|
"eval_runtime": 5.0893, |
|
"eval_samples_per_second": 78.4, |
|
"eval_steps_per_second": 9.825, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.57472562789917, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5048, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7518796992481203, |
|
"eval_f1": 0.7147221319467014, |
|
"eval_loss": 0.49262356758117676, |
|
"eval_precision": 0.707896051974013, |
|
"eval_recall": 0.7269503546099291, |
|
"eval_runtime": 5.089, |
|
"eval_samples_per_second": 78.404, |
|
"eval_steps_per_second": 9.825, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.7757699489593506, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4529, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7994987468671679, |
|
"eval_f1": 0.7593340973520719, |
|
"eval_loss": 0.430127888917923, |
|
"eval_precision": 0.7580882352941176, |
|
"eval_recall": 0.7606382978723404, |
|
"eval_runtime": 5.0577, |
|
"eval_samples_per_second": 78.889, |
|
"eval_steps_per_second": 9.886, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.7683963775634766, |
|
"learning_rate": 4e-05, |
|
"loss": 0.393, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8220551378446115, |
|
"eval_f1": 0.781440706080221, |
|
"eval_loss": 0.3863331973552704, |
|
"eval_precision": 0.787071406102548, |
|
"eval_recall": 0.776595744680851, |
|
"eval_runtime": 5.0509, |
|
"eval_samples_per_second": 78.996, |
|
"eval_steps_per_second": 9.899, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.220449924468994, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.3754, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.800328853302831, |
|
"eval_loss": 0.38681742548942566, |
|
"eval_precision": 0.7891990821678322, |
|
"eval_recall": 0.8208765230041826, |
|
"eval_runtime": 5.0595, |
|
"eval_samples_per_second": 78.862, |
|
"eval_steps_per_second": 9.882, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.7665016651153564, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3455, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8446115288220551, |
|
"eval_f1": 0.8125568285142754, |
|
"eval_loss": 0.36048319935798645, |
|
"eval_precision": 0.8125568285142754, |
|
"eval_recall": 0.8125568285142754, |
|
"eval_runtime": 5.0485, |
|
"eval_samples_per_second": 79.034, |
|
"eval_steps_per_second": 9.904, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.0729649066925049, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3344, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8228567054500919, |
|
"eval_loss": 0.33959081768989563, |
|
"eval_precision": 0.8263351692555232, |
|
"eval_recall": 0.8196490270958356, |
|
"eval_runtime": 5.0547, |
|
"eval_samples_per_second": 78.936, |
|
"eval_steps_per_second": 9.892, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.983180522918701, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3157, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8368501045387564, |
|
"eval_loss": 0.3319353461265564, |
|
"eval_precision": 0.8435514312676942, |
|
"eval_recall": 0.8310147299509002, |
|
"eval_runtime": 5.0701, |
|
"eval_samples_per_second": 78.696, |
|
"eval_steps_per_second": 9.862, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 8.283629417419434, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3076, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8209821152299028, |
|
"eval_loss": 0.32728075981140137, |
|
"eval_precision": 0.8284245491932933, |
|
"eval_recall": 0.8146481178396072, |
|
"eval_runtime": 5.0743, |
|
"eval_samples_per_second": 78.631, |
|
"eval_steps_per_second": 9.854, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.000823020935059, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2948, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8448388501742161, |
|
"eval_loss": 0.3238257169723511, |
|
"eval_precision": 0.8551721930610677, |
|
"eval_recall": 0.8363338788870704, |
|
"eval_runtime": 5.0959, |
|
"eval_samples_per_second": 78.299, |
|
"eval_steps_per_second": 9.812, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.291066884994507, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.2737, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8395012067578439, |
|
"eval_loss": 0.31985798478126526, |
|
"eval_precision": 0.8473584308763049, |
|
"eval_recall": 0.8327877795962902, |
|
"eval_runtime": 5.0445, |
|
"eval_samples_per_second": 79.096, |
|
"eval_steps_per_second": 9.912, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 9.703360557556152, |
|
"learning_rate": 2e-05, |
|
"loss": 0.2741, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8315338681464504, |
|
"eval_loss": 0.31903842091560364, |
|
"eval_precision": 0.8299369747899159, |
|
"eval_recall": 0.8331969448990726, |
|
"eval_runtime": 5.0933, |
|
"eval_samples_per_second": 78.338, |
|
"eval_steps_per_second": 9.817, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.6231120824813843, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.275, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8771929824561403, |
|
"eval_f1": 0.8458135188208294, |
|
"eval_loss": 0.3146449625492096, |
|
"eval_precision": 0.8627946127946129, |
|
"eval_recall": 0.8331060192762321, |
|
"eval_runtime": 5.0519, |
|
"eval_samples_per_second": 78.98, |
|
"eval_steps_per_second": 9.897, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 7.953589916229248, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2736, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8403508771929824, |
|
"eval_loss": 0.3103943169116974, |
|
"eval_precision": 0.8460491741741742, |
|
"eval_recall": 0.8352882342244045, |
|
"eval_runtime": 5.0653, |
|
"eval_samples_per_second": 78.771, |
|
"eval_steps_per_second": 9.871, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1334174871444702, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.263, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8401647707947546, |
|
"eval_loss": 0.3111773729324341, |
|
"eval_precision": 0.8393298751432535, |
|
"eval_recall": 0.8410165484633569, |
|
"eval_runtime": 5.0778, |
|
"eval_samples_per_second": 78.577, |
|
"eval_steps_per_second": 9.847, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.466490745544434, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2583, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8461962888779714, |
|
"eval_loss": 0.3085690140724182, |
|
"eval_precision": 0.8453465227094517, |
|
"eval_recall": 0.8470631023822512, |
|
"eval_runtime": 5.0785, |
|
"eval_samples_per_second": 78.567, |
|
"eval_steps_per_second": 9.846, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 7.930349826812744, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2544, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8421640488656195, |
|
"eval_loss": 0.30654484033584595, |
|
"eval_precision": 0.8512313860252005, |
|
"eval_recall": 0.8345608292416803, |
|
"eval_runtime": 5.0605, |
|
"eval_samples_per_second": 78.845, |
|
"eval_steps_per_second": 9.88, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 6.3030524253845215, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2594, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8696741854636592, |
|
"eval_f1": 0.8411818738518064, |
|
"eval_loss": 0.30556628108024597, |
|
"eval_precision": 0.8448542607834644, |
|
"eval_recall": 0.8377886888525186, |
|
"eval_runtime": 5.1023, |
|
"eval_samples_per_second": 78.2, |
|
"eval_steps_per_second": 9.799, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.6230645179748535, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.256, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8721804511278195, |
|
"eval_f1": 0.8421640488656195, |
|
"eval_loss": 0.3042590618133545, |
|
"eval_precision": 0.8512313860252005, |
|
"eval_recall": 0.8345608292416803, |
|
"eval_runtime": 5.0513, |
|
"eval_samples_per_second": 78.99, |
|
"eval_steps_per_second": 9.898, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 8.38916301727295, |
|
"learning_rate": 0.0, |
|
"loss": 0.2515, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.87468671679198, |
|
"eval_f1": 0.8464912280701754, |
|
"eval_loss": 0.3035424053668976, |
|
"eval_precision": 0.8522897897897899, |
|
"eval_recall": 0.8413347881432988, |
|
"eval_runtime": 5.0496, |
|
"eval_samples_per_second": 79.017, |
|
"eval_steps_per_second": 9.902, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7609911792720000.0, |
|
"train_loss": 0.32642708059217107, |
|
"train_runtime": 1953.293, |
|
"train_samples_per_second": 37.25, |
|
"train_steps_per_second": 1.249 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7609911792720000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|