|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.044961452484131, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5657, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6488125720138267, |
|
"eval_loss": 0.518221914768219, |
|
"eval_precision": 0.660425343073667, |
|
"eval_recall": 0.642434988179669, |
|
"eval_runtime": 5.1435, |
|
"eval_samples_per_second": 77.573, |
|
"eval_steps_per_second": 9.721, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.9827260971069336, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5109, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.6796350364963504, |
|
"eval_loss": 0.5051248073577881, |
|
"eval_precision": 0.674812030075188, |
|
"eval_recall": 0.6874431714857246, |
|
"eval_runtime": 5.053, |
|
"eval_samples_per_second": 78.963, |
|
"eval_steps_per_second": 9.895, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.8286046981811523, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.48, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7568922305764411, |
|
"eval_f1": 0.6947737005228665, |
|
"eval_loss": 0.4642585515975952, |
|
"eval_precision": 0.704743513567043, |
|
"eval_recall": 0.6879887252227678, |
|
"eval_runtime": 5.0487, |
|
"eval_samples_per_second": 79.031, |
|
"eval_steps_per_second": 9.904, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.501376152038574, |
|
"learning_rate": 4e-05, |
|
"loss": 0.434, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7919799498746867, |
|
"eval_f1": 0.7431297265852239, |
|
"eval_loss": 0.4281364977359772, |
|
"eval_precision": 0.7496659030164186, |
|
"eval_recall": 0.7378159665393708, |
|
"eval_runtime": 5.1077, |
|
"eval_samples_per_second": 78.117, |
|
"eval_steps_per_second": 9.789, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.9612770080566406, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4106, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7919799498746867, |
|
"eval_f1": 0.761811604105382, |
|
"eval_loss": 0.4194311499595642, |
|
"eval_precision": 0.7527992277992278, |
|
"eval_recall": 0.777823240589198, |
|
"eval_runtime": 5.0979, |
|
"eval_samples_per_second": 78.267, |
|
"eval_steps_per_second": 9.808, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.141845941543579, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.3812, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8295739348370927, |
|
"eval_f1": 0.785416007592534, |
|
"eval_loss": 0.3935754895210266, |
|
"eval_precision": 0.8008173300551531, |
|
"eval_recall": 0.7744135297326786, |
|
"eval_runtime": 5.0723, |
|
"eval_samples_per_second": 78.662, |
|
"eval_steps_per_second": 9.857, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.9107286930084229, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.3689, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8211781685593832, |
|
"eval_loss": 0.37001386284828186, |
|
"eval_precision": 0.8219964664310955, |
|
"eval_recall": 0.8203764320785598, |
|
"eval_runtime": 5.0582, |
|
"eval_samples_per_second": 78.882, |
|
"eval_steps_per_second": 9.885, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.303086280822754, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3489, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.7905211912943871, |
|
"eval_loss": 0.3656045198440552, |
|
"eval_precision": 0.8087878787878788, |
|
"eval_recall": 0.7779596290234588, |
|
"eval_runtime": 5.1093, |
|
"eval_samples_per_second": 78.093, |
|
"eval_steps_per_second": 9.786, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.8105268478393555, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3502, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.7954669127215085, |
|
"eval_loss": 0.3640279769897461, |
|
"eval_precision": 0.8101109130520895, |
|
"eval_recall": 0.7847335879250773, |
|
"eval_runtime": 5.0578, |
|
"eval_samples_per_second": 78.888, |
|
"eval_steps_per_second": 9.886, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 7.1581597328186035, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3349, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8345864661654135, |
|
"eval_f1": 0.7917273014868713, |
|
"eval_loss": 0.3607986867427826, |
|
"eval_precision": 0.8074456774536514, |
|
"eval_recall": 0.780460083651573, |
|
"eval_runtime": 5.0779, |
|
"eval_samples_per_second": 78.576, |
|
"eval_steps_per_second": 9.847, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.5321431159973145, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3189, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.7991821327461466, |
|
"eval_loss": 0.3574356436729431, |
|
"eval_precision": 0.8127623983206507, |
|
"eval_recall": 0.7890070921985816, |
|
"eval_runtime": 5.0826, |
|
"eval_samples_per_second": 78.504, |
|
"eval_steps_per_second": 9.838, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 10.805797576904297, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3121, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8471177944862155, |
|
"eval_f1": 0.813209415123445, |
|
"eval_loss": 0.3547250032424927, |
|
"eval_precision": 0.8175087108013936, |
|
"eval_recall": 0.809328968903437, |
|
"eval_runtime": 5.0982, |
|
"eval_samples_per_second": 78.263, |
|
"eval_steps_per_second": 9.807, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.36875057220459, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.3181, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8521303258145363, |
|
"eval_f1": 0.8121903546212454, |
|
"eval_loss": 0.347785085439682, |
|
"eval_precision": 0.8331751305173232, |
|
"eval_recall": 0.7978723404255319, |
|
"eval_runtime": 5.0654, |
|
"eval_samples_per_second": 78.769, |
|
"eval_steps_per_second": 9.871, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 10.049259185791016, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3092, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8252627627627628, |
|
"eval_loss": 0.34348130226135254, |
|
"eval_precision": 0.8374149659863945, |
|
"eval_recall": 0.8156937625022731, |
|
"eval_runtime": 5.0614, |
|
"eval_samples_per_second": 78.833, |
|
"eval_steps_per_second": 9.879, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.0126718282699585, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.3018, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8200130662020906, |
|
"eval_loss": 0.34661754965782166, |
|
"eval_precision": 0.8296312892075278, |
|
"eval_recall": 0.812147663211493, |
|
"eval_runtime": 5.0618, |
|
"eval_samples_per_second": 78.825, |
|
"eval_steps_per_second": 9.878, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 7.444075584411621, |
|
"learning_rate": 1e-05, |
|
"loss": 0.2955, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8271551457392166, |
|
"eval_loss": 0.33646759390830994, |
|
"eval_precision": 0.8347358430876305, |
|
"eval_recall": 0.8206946717585015, |
|
"eval_runtime": 5.2151, |
|
"eval_samples_per_second": 76.508, |
|
"eval_steps_per_second": 9.588, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 4.367713451385498, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.2917, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8252627627627628, |
|
"eval_loss": 0.33527326583862305, |
|
"eval_precision": 0.8374149659863945, |
|
"eval_recall": 0.8156937625022731, |
|
"eval_runtime": 5.0724, |
|
"eval_samples_per_second": 78.661, |
|
"eval_steps_per_second": 9.857, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 3.2525553703308105, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2956, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8596491228070176, |
|
"eval_f1": 0.8262195121951219, |
|
"eval_loss": 0.3378874957561493, |
|
"eval_precision": 0.8360165151709128, |
|
"eval_recall": 0.8181942171303873, |
|
"eval_runtime": 5.0484, |
|
"eval_samples_per_second": 79.035, |
|
"eval_steps_per_second": 9.904, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.5347208976745605, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2899, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8305599245045612, |
|
"eval_loss": 0.3353268504142761, |
|
"eval_precision": 0.8454801889267909, |
|
"eval_recall": 0.8192398617930533, |
|
"eval_runtime": 5.0542, |
|
"eval_samples_per_second": 78.945, |
|
"eval_steps_per_second": 9.893, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 8.835315704345703, |
|
"learning_rate": 0.0, |
|
"loss": 0.2885, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8288555273932541, |
|
"eval_loss": 0.3355979323387146, |
|
"eval_precision": 0.8399124219202783, |
|
"eval_recall": 0.8199672667757774, |
|
"eval_runtime": 5.1038, |
|
"eval_samples_per_second": 78.178, |
|
"eval_steps_per_second": 9.797, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7597037114448000.0, |
|
"train_loss": 0.3603187435963115, |
|
"train_runtime": 1953.0761, |
|
"train_samples_per_second": 37.254, |
|
"train_steps_per_second": 1.249 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7597037114448000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|