|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.8857197761535645, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.563, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6586454703832753, |
|
"eval_loss": 0.5137906670570374, |
|
"eval_precision": 0.6636154141595185, |
|
"eval_recall": 0.65493726132024, |
|
"eval_runtime": 5.1569, |
|
"eval_samples_per_second": 77.372, |
|
"eval_steps_per_second": 9.696, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.8023645877838135, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.509, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7167919799498746, |
|
"eval_f1": 0.68198649992594, |
|
"eval_loss": 0.5057324767112732, |
|
"eval_precision": 0.6763453815261045, |
|
"eval_recall": 0.6996272049463539, |
|
"eval_runtime": 5.0504, |
|
"eval_samples_per_second": 79.004, |
|
"eval_steps_per_second": 9.9, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 4.055728912353516, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4924, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7393483709273183, |
|
"eval_f1": 0.6901433691756272, |
|
"eval_loss": 0.4707716703414917, |
|
"eval_precision": 0.6876546482856133, |
|
"eval_recall": 0.6930805601018367, |
|
"eval_runtime": 5.0475, |
|
"eval_samples_per_second": 79.049, |
|
"eval_steps_per_second": 9.906, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.744323253631592, |
|
"learning_rate": 4e-05, |
|
"loss": 0.468, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7844611528822055, |
|
"eval_f1": 0.7286143625434989, |
|
"eval_loss": 0.4378769099712372, |
|
"eval_precision": 0.741162203468669, |
|
"eval_recall": 0.7199945444626296, |
|
"eval_runtime": 5.0868, |
|
"eval_samples_per_second": 78.438, |
|
"eval_steps_per_second": 9.829, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 1.431583285331726, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4495, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7593984962406015, |
|
"eval_f1": 0.7313131313131314, |
|
"eval_loss": 0.44655734300613403, |
|
"eval_precision": 0.7233381157340986, |
|
"eval_recall": 0.7547735951991271, |
|
"eval_runtime": 5.1176, |
|
"eval_samples_per_second": 77.966, |
|
"eval_steps_per_second": 9.77, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.092684745788574, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4334, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.7887122892379951, |
|
"eval_loss": 0.40413278341293335, |
|
"eval_precision": 0.7926829268292683, |
|
"eval_recall": 0.7851427532278596, |
|
"eval_runtime": 5.0433, |
|
"eval_samples_per_second": 79.114, |
|
"eval_steps_per_second": 9.914, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 2.259237289428711, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.415, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7994987468671679, |
|
"eval_f1": 0.765982404692082, |
|
"eval_loss": 0.4057486653327942, |
|
"eval_precision": 0.7590435228437963, |
|
"eval_recall": 0.7756410256410257, |
|
"eval_runtime": 5.0592, |
|
"eval_samples_per_second": 78.867, |
|
"eval_steps_per_second": 9.883, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.503615379333496, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3974, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.7958954625621293, |
|
"eval_loss": 0.3851749002933502, |
|
"eval_precision": 0.7982456140350878, |
|
"eval_recall": 0.7936897617748682, |
|
"eval_runtime": 5.1017, |
|
"eval_samples_per_second": 78.209, |
|
"eval_steps_per_second": 9.801, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.705313205718994, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3849, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.7894173351830629, |
|
"eval_loss": 0.3829491138458252, |
|
"eval_precision": 0.7880252100840336, |
|
"eval_recall": 0.7908710674668122, |
|
"eval_runtime": 5.058, |
|
"eval_samples_per_second": 78.884, |
|
"eval_steps_per_second": 9.885, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.997393608093262, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3771, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.8065102745953809, |
|
"eval_loss": 0.3785531520843506, |
|
"eval_precision": 0.8065102745953809, |
|
"eval_recall": 0.8065102745953809, |
|
"eval_runtime": 5.0614, |
|
"eval_samples_per_second": 78.831, |
|
"eval_steps_per_second": 9.879, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 3.873344659805298, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3633, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7992838965496833, |
|
"eval_loss": 0.3843457102775574, |
|
"eval_precision": 0.7931478693839741, |
|
"eval_recall": 0.8069194398981633, |
|
"eval_runtime": 5.0605, |
|
"eval_samples_per_second": 78.846, |
|
"eval_steps_per_second": 9.88, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 10.848413467407227, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3591, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.7992838965496833, |
|
"eval_loss": 0.3832751214504242, |
|
"eval_precision": 0.7931478693839741, |
|
"eval_recall": 0.8069194398981633, |
|
"eval_runtime": 5.0639, |
|
"eval_samples_per_second": 78.793, |
|
"eval_steps_per_second": 9.874, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.711672306060791, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.354, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.8065102745953809, |
|
"eval_loss": 0.3705191910266876, |
|
"eval_precision": 0.8065102745953809, |
|
"eval_recall": 0.8065102745953809, |
|
"eval_runtime": 5.0892, |
|
"eval_samples_per_second": 78.401, |
|
"eval_steps_per_second": 9.825, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 8.37232494354248, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3451, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.8049369344976196, |
|
"eval_loss": 0.37085026502609253, |
|
"eval_precision": 0.8027777777777778, |
|
"eval_recall": 0.8072376795781051, |
|
"eval_runtime": 5.0497, |
|
"eval_samples_per_second": 79.014, |
|
"eval_steps_per_second": 9.902, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.1326128244400024, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.3403, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.8026866442779643, |
|
"eval_loss": 0.3732873201370239, |
|
"eval_precision": 0.795995733394834, |
|
"eval_recall": 0.8111929441716675, |
|
"eval_runtime": 5.0568, |
|
"eval_samples_per_second": 78.903, |
|
"eval_steps_per_second": 9.888, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 6.462644100189209, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3282, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.8060710498409331, |
|
"eval_loss": 0.3714647889137268, |
|
"eval_precision": 0.7988372093023256, |
|
"eval_recall": 0.8154664484451719, |
|
"eval_runtime": 5.1224, |
|
"eval_samples_per_second": 77.894, |
|
"eval_steps_per_second": 9.761, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.0335631370544434, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.3286, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.7999041923338897, |
|
"eval_loss": 0.36644819378852844, |
|
"eval_precision": 0.7965023376930815, |
|
"eval_recall": 0.803691580287325, |
|
"eval_runtime": 5.0626, |
|
"eval_samples_per_second": 78.813, |
|
"eval_steps_per_second": 9.876, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 2.8270199298858643, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3348, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.7949075143216848, |
|
"eval_loss": 0.3669916093349457, |
|
"eval_precision": 0.7904105571847508, |
|
"eval_recall": 0.8001454809965449, |
|
"eval_runtime": 5.0702, |
|
"eval_samples_per_second": 78.695, |
|
"eval_steps_per_second": 9.861, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.275764465332031, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.325, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.801779557335113, |
|
"eval_loss": 0.36691346764564514, |
|
"eval_precision": 0.7960927960927962, |
|
"eval_recall": 0.8086924895435534, |
|
"eval_runtime": 5.0526, |
|
"eval_samples_per_second": 78.969, |
|
"eval_steps_per_second": 9.896, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 6.228915691375732, |
|
"learning_rate": 0.0, |
|
"loss": 0.3266, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.801779557335113, |
|
"eval_loss": 0.3671587407588959, |
|
"eval_precision": 0.7960927960927962, |
|
"eval_recall": 0.8086924895435534, |
|
"eval_runtime": 5.0972, |
|
"eval_samples_per_second": 78.279, |
|
"eval_steps_per_second": 9.809, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7590599775312000.0, |
|
"train_loss": 0.3947384412171411, |
|
"train_runtime": 1956.7085, |
|
"train_samples_per_second": 37.185, |
|
"train_steps_per_second": 1.247 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7590599775312000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|