{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.044961452484131, "learning_rate": 4.75e-05, "loss": 0.5657, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6488125720138267, "eval_loss": 0.518221914768219, "eval_precision": 0.660425343073667, "eval_recall": 0.642434988179669, "eval_runtime": 5.1317, "eval_samples_per_second": 77.752, "eval_steps_per_second": 9.743, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.9827260971069336, "learning_rate": 4.5e-05, "loss": 0.5109, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6796350364963504, "eval_loss": 0.5051248073577881, "eval_precision": 0.674812030075188, "eval_recall": 0.6874431714857246, "eval_runtime": 5.0559, "eval_samples_per_second": 78.917, "eval_steps_per_second": 9.889, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.8286046981811523, "learning_rate": 4.25e-05, "loss": 0.48, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7568922305764411, "eval_f1": 0.6947737005228665, "eval_loss": 0.4642585515975952, "eval_precision": 0.704743513567043, "eval_recall": 0.6879887252227678, "eval_runtime": 5.0508, "eval_samples_per_second": 78.997, "eval_steps_per_second": 9.899, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.501376152038574, "learning_rate": 4e-05, "loss": 0.434, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.7919799498746867, "eval_f1": 0.7431297265852239, "eval_loss": 0.4281364977359772, "eval_precision": 0.7496659030164186, "eval_recall": 0.7378159665393708, "eval_runtime": 5.1744, "eval_samples_per_second": 77.111, "eval_steps_per_second": 9.663, "step": 488 }, { "epoch": 5.0, "grad_norm": 2.9612770080566406, "learning_rate": 3.7500000000000003e-05, "loss": 0.4106, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7919799498746867, "eval_f1": 0.761811604105382, "eval_loss": 0.4194311499595642, "eval_precision": 0.7527992277992278, "eval_recall": 0.777823240589198, "eval_runtime": 5.0693, "eval_samples_per_second": 78.709, "eval_steps_per_second": 9.863, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.141845941543579, "learning_rate": 3.5e-05, "loss": 0.3812, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8295739348370927, "eval_f1": 0.785416007592534, "eval_loss": 0.3935754895210266, "eval_precision": 0.8008173300551531, "eval_recall": 0.7744135297326786, "eval_runtime": 5.0513, "eval_samples_per_second": 78.989, "eval_steps_per_second": 9.898, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.9107286930084229, "learning_rate": 3.2500000000000004e-05, "loss": 0.3689, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8211781685593832, "eval_loss": 0.37001386284828186, "eval_precision": 0.8219964664310955, "eval_recall": 0.8203764320785598, "eval_runtime": 5.0579, "eval_samples_per_second": 78.886, "eval_steps_per_second": 9.885, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.303086280822754, "learning_rate": 3e-05, "loss": 0.3489, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.7905211912943871, "eval_loss": 0.3656045198440552, "eval_precision": 0.8087878787878788, "eval_recall": 0.7779596290234588, "eval_runtime": 5.1317, "eval_samples_per_second": 77.752, "eval_steps_per_second": 9.743, "step": 976 }, { "epoch": 9.0, "grad_norm": 4.8105268478393555, "learning_rate": 2.7500000000000004e-05, "loss": 0.3502, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.7954669127215085, "eval_loss": 0.3640279769897461, "eval_precision": 0.8101109130520895, "eval_recall": 0.7847335879250773, "eval_runtime": 5.0641, "eval_samples_per_second": 78.79, "eval_steps_per_second": 9.873, "step": 1098 }, { "epoch": 10.0, "grad_norm": 7.1581597328186035, "learning_rate": 2.5e-05, "loss": 0.3349, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.7917273014868713, "eval_loss": 0.3607986867427826, "eval_precision": 0.8074456774536514, "eval_recall": 0.780460083651573, "eval_runtime": 5.0646, "eval_samples_per_second": 78.782, "eval_steps_per_second": 9.872, "step": 1220 }, { "epoch": 11.0, "grad_norm": 4.5321431159973145, "learning_rate": 2.25e-05, "loss": 0.3189, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.7991821327461466, "eval_loss": 0.3574356436729431, "eval_precision": 0.8127623983206507, "eval_recall": 0.7890070921985816, "eval_runtime": 5.0619, "eval_samples_per_second": 78.824, "eval_steps_per_second": 9.878, "step": 1342 }, { "epoch": 12.0, "grad_norm": 10.805797576904297, "learning_rate": 2e-05, "loss": 0.3121, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.813209415123445, "eval_loss": 0.3547250032424927, "eval_precision": 0.8175087108013936, "eval_recall": 0.809328968903437, "eval_runtime": 5.0575, "eval_samples_per_second": 78.893, "eval_steps_per_second": 9.886, "step": 1464 }, { "epoch": 13.0, "grad_norm": 4.36875057220459, "learning_rate": 1.75e-05, "loss": 0.3181, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8121903546212454, "eval_loss": 0.347785085439682, "eval_precision": 0.8331751305173232, "eval_recall": 0.7978723404255319, "eval_runtime": 5.0847, "eval_samples_per_second": 78.471, "eval_steps_per_second": 9.833, "step": 1586 }, { "epoch": 14.0, "grad_norm": 10.049259185791016, "learning_rate": 1.5e-05, "loss": 0.3092, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8252627627627628, "eval_loss": 0.34348130226135254, "eval_precision": 0.8374149659863945, "eval_recall": 0.8156937625022731, "eval_runtime": 5.0603, "eval_samples_per_second": 78.849, "eval_steps_per_second": 9.881, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.0126718282699585, "learning_rate": 1.25e-05, "loss": 0.3018, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8200130662020906, "eval_loss": 0.34661754965782166, "eval_precision": 0.8296312892075278, "eval_recall": 0.812147663211493, "eval_runtime": 5.0762, "eval_samples_per_second": 78.603, "eval_steps_per_second": 9.85, "step": 1830 }, { "epoch": 16.0, "grad_norm": 7.444075584411621, "learning_rate": 1e-05, "loss": 0.2955, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8271551457392166, "eval_loss": 0.33646759390830994, "eval_precision": 0.8347358430876305, "eval_recall": 0.8206946717585015, "eval_runtime": 5.138, "eval_samples_per_second": 77.657, "eval_steps_per_second": 9.731, "step": 1952 }, { "epoch": 17.0, "grad_norm": 4.367713451385498, "learning_rate": 7.5e-06, "loss": 0.2917, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8252627627627628, "eval_loss": 0.33527326583862305, "eval_precision": 0.8374149659863945, "eval_recall": 0.8156937625022731, "eval_runtime": 5.05, "eval_samples_per_second": 79.01, "eval_steps_per_second": 9.901, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.2525553703308105, "learning_rate": 5e-06, "loss": 0.2956, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8262195121951219, "eval_loss": 0.3378874957561493, "eval_precision": 0.8360165151709128, "eval_recall": 0.8181942171303873, "eval_runtime": 5.0525, "eval_samples_per_second": 78.971, "eval_steps_per_second": 9.896, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.5347208976745605, "learning_rate": 2.5e-06, "loss": 0.2899, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8305599245045612, "eval_loss": 0.3353268504142761, "eval_precision": 0.8454801889267909, "eval_recall": 0.8192398617930533, "eval_runtime": 5.0467, "eval_samples_per_second": 79.061, "eval_steps_per_second": 9.907, "step": 2318 }, { "epoch": 20.0, "grad_norm": 8.835315704345703, "learning_rate": 0.0, "loss": 0.2885, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8288555273932541, "eval_loss": 0.3355979323387146, "eval_precision": 0.8399124219202783, "eval_recall": 0.8199672667757774, "eval_runtime": 5.0576, "eval_samples_per_second": 78.891, "eval_steps_per_second": 9.886, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7597037114448000.0, "train_loss": 0.3603187435963115, "train_runtime": 1953.3721, "train_samples_per_second": 37.248, "train_steps_per_second": 1.249 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7597037114448000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }