adapters-opt-gptq-QLORA-super_glue-copa
/
trainer_state-opt-gptq-QLORA-super_glue-copa-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 1, | |
"global_step": 50, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.2, | |
"grad_norm": 23.256885528564453, | |
"learning_rate": 2.5e-05, | |
"loss": 0.9107, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_accuracy": 0.57, | |
"eval_loss": 0.8262696862220764, | |
"eval_runtime": 0.8414, | |
"eval_samples_per_second": 118.852, | |
"eval_steps_per_second": 3.566, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 23.631181716918945, | |
"learning_rate": 5e-05, | |
"loss": 0.9117, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_accuracy": 0.57, | |
"eval_loss": 0.8167089819908142, | |
"eval_runtime": 0.8385, | |
"eval_samples_per_second": 119.261, | |
"eval_steps_per_second": 3.578, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.6, | |
"grad_norm": 28.508174896240234, | |
"learning_rate": 4.8958333333333335e-05, | |
"loss": 0.9991, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.6, | |
"eval_accuracy": 0.57, | |
"eval_loss": 0.7878709435462952, | |
"eval_runtime": 0.8413, | |
"eval_samples_per_second": 118.87, | |
"eval_steps_per_second": 3.566, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 19.008262634277344, | |
"learning_rate": 4.791666666666667e-05, | |
"loss": 0.8288, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_accuracy": 0.56, | |
"eval_loss": 0.7578710913658142, | |
"eval_runtime": 0.8391, | |
"eval_samples_per_second": 119.176, | |
"eval_steps_per_second": 3.575, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 23.889429092407227, | |
"learning_rate": 4.6875e-05, | |
"loss": 0.8763, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_accuracy": 0.53, | |
"eval_loss": 0.7321679592132568, | |
"eval_runtime": 0.8386, | |
"eval_samples_per_second": 119.253, | |
"eval_steps_per_second": 3.578, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.2, | |
"grad_norm": 9.946539878845215, | |
"learning_rate": 4.5833333333333334e-05, | |
"loss": 0.7244, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.2, | |
"eval_accuracy": 0.55, | |
"eval_loss": 0.7117968797683716, | |
"eval_runtime": 0.8351, | |
"eval_samples_per_second": 119.747, | |
"eval_steps_per_second": 3.592, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.4, | |
"grad_norm": 9.386951446533203, | |
"learning_rate": 4.4791666666666673e-05, | |
"loss": 0.7601, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.4, | |
"eval_accuracy": 0.54, | |
"eval_loss": 0.6952930688858032, | |
"eval_runtime": 0.8393, | |
"eval_samples_per_second": 119.145, | |
"eval_steps_per_second": 3.574, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 20.157466888427734, | |
"learning_rate": 4.375e-05, | |
"loss": 0.8343, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.6, | |
"eval_accuracy": 0.54, | |
"eval_loss": 0.6882422566413879, | |
"eval_runtime": 0.8343, | |
"eval_samples_per_second": 119.865, | |
"eval_steps_per_second": 3.596, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.8, | |
"grad_norm": 6.170377731323242, | |
"learning_rate": 4.270833333333333e-05, | |
"loss": 0.6825, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.8, | |
"eval_accuracy": 0.53, | |
"eval_loss": 0.6919335126876831, | |
"eval_runtime": 0.8402, | |
"eval_samples_per_second": 119.014, | |
"eval_steps_per_second": 3.57, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 3.945521831512451, | |
"learning_rate": 4.166666666666667e-05, | |
"loss": 0.7545, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.6997266411781311, | |
"eval_runtime": 0.8398, | |
"eval_samples_per_second": 119.072, | |
"eval_steps_per_second": 3.572, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.2, | |
"grad_norm": 8.659540176391602, | |
"learning_rate": 4.0625000000000005e-05, | |
"loss": 0.709, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.2, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.7026562690734863, | |
"eval_runtime": 0.8388, | |
"eval_samples_per_second": 119.217, | |
"eval_steps_per_second": 3.577, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.4, | |
"grad_norm": 3.4722447395324707, | |
"learning_rate": 3.958333333333333e-05, | |
"loss": 0.6914, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.4, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.704160213470459, | |
"eval_runtime": 0.839, | |
"eval_samples_per_second": 119.192, | |
"eval_steps_per_second": 3.576, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.6, | |
"grad_norm": 4.033767223358154, | |
"learning_rate": 3.854166666666667e-05, | |
"loss": 0.6999, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.6, | |
"eval_accuracy": 0.48, | |
"eval_loss": 0.7015135884284973, | |
"eval_runtime": 0.8381, | |
"eval_samples_per_second": 119.324, | |
"eval_steps_per_second": 3.58, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.8, | |
"grad_norm": 2.7851364612579346, | |
"learning_rate": 3.7500000000000003e-05, | |
"loss": 0.6829, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.8, | |
"eval_accuracy": 0.49, | |
"eval_loss": 0.7031835913658142, | |
"eval_runtime": 0.839, | |
"eval_samples_per_second": 119.197, | |
"eval_steps_per_second": 3.576, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 2.5898079872131348, | |
"learning_rate": 3.6458333333333336e-05, | |
"loss": 0.6776, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7040430307388306, | |
"eval_runtime": 0.8373, | |
"eval_samples_per_second": 119.43, | |
"eval_steps_per_second": 3.583, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.2, | |
"grad_norm": 7.040603160858154, | |
"learning_rate": 3.541666666666667e-05, | |
"loss": 0.7151, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.2, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7056737542152405, | |
"eval_runtime": 0.8384, | |
"eval_samples_per_second": 119.274, | |
"eval_steps_per_second": 3.578, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.4, | |
"grad_norm": 6.8378705978393555, | |
"learning_rate": 3.4375e-05, | |
"loss": 0.7186, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.4, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7082374095916748, | |
"eval_runtime": 0.8394, | |
"eval_samples_per_second": 119.131, | |
"eval_steps_per_second": 3.574, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.6, | |
"grad_norm": 4.7442545890808105, | |
"learning_rate": 3.3333333333333335e-05, | |
"loss": 0.7058, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.6, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7107178568840027, | |
"eval_runtime": 0.7888, | |
"eval_samples_per_second": 126.768, | |
"eval_steps_per_second": 3.803, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.8, | |
"grad_norm": 5.4023237228393555, | |
"learning_rate": 3.229166666666667e-05, | |
"loss": 0.7207, | |
"step": 19 | |
}, | |
{ | |
"epoch": 3.8, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7103956341743469, | |
"eval_runtime": 0.8395, | |
"eval_samples_per_second": 119.117, | |
"eval_steps_per_second": 3.573, | |
"step": 19 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 5.683109283447266, | |
"learning_rate": 3.125e-05, | |
"loss": 0.6839, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7080614566802979, | |
"eval_runtime": 0.8405, | |
"eval_samples_per_second": 118.971, | |
"eval_steps_per_second": 3.569, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.2, | |
"grad_norm": 4.991653919219971, | |
"learning_rate": 3.0208333333333334e-05, | |
"loss": 0.6378, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.2, | |
"eval_accuracy": 0.49, | |
"eval_loss": 0.7062841653823853, | |
"eval_runtime": 0.839, | |
"eval_samples_per_second": 119.188, | |
"eval_steps_per_second": 3.576, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.4, | |
"grad_norm": 4.332605361938477, | |
"learning_rate": 2.916666666666667e-05, | |
"loss": 0.7266, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.4, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7057519555091858, | |
"eval_runtime": 0.8392, | |
"eval_samples_per_second": 119.161, | |
"eval_steps_per_second": 3.575, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.6, | |
"grad_norm": 3.1174395084381104, | |
"learning_rate": 2.8125000000000003e-05, | |
"loss": 0.6729, | |
"step": 23 | |
}, | |
{ | |
"epoch": 4.6, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7046825885772705, | |
"eval_runtime": 0.8377, | |
"eval_samples_per_second": 119.373, | |
"eval_steps_per_second": 3.581, | |
"step": 23 | |
}, | |
{ | |
"epoch": 4.8, | |
"grad_norm": 3.505262613296509, | |
"learning_rate": 2.7083333333333332e-05, | |
"loss": 0.7189, | |
"step": 24 | |
}, | |
{ | |
"epoch": 4.8, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.702241063117981, | |
"eval_runtime": 0.8383, | |
"eval_samples_per_second": 119.288, | |
"eval_steps_per_second": 3.579, | |
"step": 24 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 3.249929904937744, | |
"learning_rate": 2.604166666666667e-05, | |
"loss": 0.7092, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7012988328933716, | |
"eval_runtime": 0.8369, | |
"eval_samples_per_second": 119.494, | |
"eval_steps_per_second": 3.585, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.2, | |
"grad_norm": 2.5304999351501465, | |
"learning_rate": 2.5e-05, | |
"loss": 0.7145, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.2, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7005956768989563, | |
"eval_runtime": 0.836, | |
"eval_samples_per_second": 119.623, | |
"eval_steps_per_second": 3.589, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.4, | |
"grad_norm": 2.5304205417633057, | |
"learning_rate": 2.3958333333333334e-05, | |
"loss": 0.6946, | |
"step": 27 | |
}, | |
{ | |
"epoch": 5.4, | |
"eval_accuracy": 0.48, | |
"eval_loss": 0.7012646198272705, | |
"eval_runtime": 0.8366, | |
"eval_samples_per_second": 119.526, | |
"eval_steps_per_second": 3.586, | |
"step": 27 | |
}, | |
{ | |
"epoch": 5.6, | |
"grad_norm": 7.720176696777344, | |
"learning_rate": 2.2916666666666667e-05, | |
"loss": 0.7456, | |
"step": 28 | |
}, | |
{ | |
"epoch": 5.6, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7020359635353088, | |
"eval_runtime": 0.8393, | |
"eval_samples_per_second": 119.141, | |
"eval_steps_per_second": 3.574, | |
"step": 28 | |
}, | |
{ | |
"epoch": 5.8, | |
"grad_norm": 4.178812026977539, | |
"learning_rate": 2.1875e-05, | |
"loss": 0.7231, | |
"step": 29 | |
}, | |
{ | |
"epoch": 5.8, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7020019292831421, | |
"eval_runtime": 0.8397, | |
"eval_samples_per_second": 119.086, | |
"eval_steps_per_second": 3.573, | |
"step": 29 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 6.495377063751221, | |
"learning_rate": 2.0833333333333336e-05, | |
"loss": 0.6874, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7010400295257568, | |
"eval_runtime": 0.7916, | |
"eval_samples_per_second": 126.329, | |
"eval_steps_per_second": 3.79, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.2, | |
"grad_norm": 1.7229987382888794, | |
"learning_rate": 1.9791666666666665e-05, | |
"loss": 0.7039, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.2, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.7006199359893799, | |
"eval_runtime": 0.8371, | |
"eval_samples_per_second": 119.466, | |
"eval_steps_per_second": 3.584, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.4, | |
"grad_norm": 4.806630611419678, | |
"learning_rate": 1.8750000000000002e-05, | |
"loss": 0.6858, | |
"step": 32 | |
}, | |
{ | |
"epoch": 6.4, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.6990869045257568, | |
"eval_runtime": 0.836, | |
"eval_samples_per_second": 119.617, | |
"eval_steps_per_second": 3.589, | |
"step": 32 | |
}, | |
{ | |
"epoch": 6.6, | |
"grad_norm": 2.4336676597595215, | |
"learning_rate": 1.7708333333333335e-05, | |
"loss": 0.6571, | |
"step": 33 | |
}, | |
{ | |
"epoch": 6.6, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6985548138618469, | |
"eval_runtime": 0.8383, | |
"eval_samples_per_second": 119.293, | |
"eval_steps_per_second": 3.579, | |
"step": 33 | |
}, | |
{ | |
"epoch": 6.8, | |
"grad_norm": 2.346224784851074, | |
"learning_rate": 1.6666666666666667e-05, | |
"loss": 0.7069, | |
"step": 34 | |
}, | |
{ | |
"epoch": 6.8, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6970605254173279, | |
"eval_runtime": 0.8399, | |
"eval_samples_per_second": 119.069, | |
"eval_steps_per_second": 3.572, | |
"step": 34 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 6.023119926452637, | |
"learning_rate": 1.5625e-05, | |
"loss": 0.6878, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6974707245826721, | |
"eval_runtime": 0.8356, | |
"eval_samples_per_second": 119.671, | |
"eval_steps_per_second": 3.59, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.2, | |
"grad_norm": 2.5812861919403076, | |
"learning_rate": 1.4583333333333335e-05, | |
"loss": 0.7148, | |
"step": 36 | |
}, | |
{ | |
"epoch": 7.2, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6966310143470764, | |
"eval_runtime": 0.8396, | |
"eval_samples_per_second": 119.101, | |
"eval_steps_per_second": 3.573, | |
"step": 36 | |
}, | |
{ | |
"epoch": 7.4, | |
"grad_norm": 6.9987263679504395, | |
"learning_rate": 1.3541666666666666e-05, | |
"loss": 0.6821, | |
"step": 37 | |
}, | |
{ | |
"epoch": 7.4, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6957228183746338, | |
"eval_runtime": 0.838, | |
"eval_samples_per_second": 119.326, | |
"eval_steps_per_second": 3.58, | |
"step": 37 | |
}, | |
{ | |
"epoch": 7.6, | |
"grad_norm": 4.426673889160156, | |
"learning_rate": 1.25e-05, | |
"loss": 0.6866, | |
"step": 38 | |
}, | |
{ | |
"epoch": 7.6, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6958692073822021, | |
"eval_runtime": 0.8381, | |
"eval_samples_per_second": 119.321, | |
"eval_steps_per_second": 3.58, | |
"step": 38 | |
}, | |
{ | |
"epoch": 7.8, | |
"grad_norm": 1.72467839717865, | |
"learning_rate": 1.1458333333333333e-05, | |
"loss": 0.6441, | |
"step": 39 | |
}, | |
{ | |
"epoch": 7.8, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6971582174301147, | |
"eval_runtime": 0.8397, | |
"eval_samples_per_second": 119.085, | |
"eval_steps_per_second": 3.573, | |
"step": 39 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 10.352705001831055, | |
"learning_rate": 1.0416666666666668e-05, | |
"loss": 0.7264, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6962793469429016, | |
"eval_runtime": 0.8388, | |
"eval_samples_per_second": 119.219, | |
"eval_steps_per_second": 3.577, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.2, | |
"grad_norm": 4.645693778991699, | |
"learning_rate": 9.375000000000001e-06, | |
"loss": 0.7164, | |
"step": 41 | |
}, | |
{ | |
"epoch": 8.2, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.695371150970459, | |
"eval_runtime": 0.839, | |
"eval_samples_per_second": 119.183, | |
"eval_steps_per_second": 3.575, | |
"step": 41 | |
}, | |
{ | |
"epoch": 8.4, | |
"grad_norm": 7.059908390045166, | |
"learning_rate": 8.333333333333334e-06, | |
"loss": 0.737, | |
"step": 42 | |
}, | |
{ | |
"epoch": 8.4, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6947849988937378, | |
"eval_runtime": 0.8369, | |
"eval_samples_per_second": 119.483, | |
"eval_steps_per_second": 3.585, | |
"step": 42 | |
}, | |
{ | |
"epoch": 8.6, | |
"grad_norm": 1.3762595653533936, | |
"learning_rate": 7.2916666666666674e-06, | |
"loss": 0.7173, | |
"step": 43 | |
}, | |
{ | |
"epoch": 8.6, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.6942089796066284, | |
"eval_runtime": 0.8392, | |
"eval_samples_per_second": 119.159, | |
"eval_steps_per_second": 3.575, | |
"step": 43 | |
}, | |
{ | |
"epoch": 8.8, | |
"grad_norm": 4.145285129547119, | |
"learning_rate": 6.25e-06, | |
"loss": 0.6933, | |
"step": 44 | |
}, | |
{ | |
"epoch": 8.8, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.6938575506210327, | |
"eval_runtime": 0.8362, | |
"eval_samples_per_second": 119.582, | |
"eval_steps_per_second": 3.587, | |
"step": 44 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 3.7309446334838867, | |
"learning_rate": 5.208333333333334e-06, | |
"loss": 0.7204, | |
"step": 45 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.6939452886581421, | |
"eval_runtime": 0.7873, | |
"eval_samples_per_second": 127.008, | |
"eval_steps_per_second": 3.81, | |
"step": 45 | |
}, | |
{ | |
"epoch": 9.2, | |
"grad_norm": 2.0658915042877197, | |
"learning_rate": 4.166666666666667e-06, | |
"loss": 0.707, | |
"step": 46 | |
}, | |
{ | |
"epoch": 9.2, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.6938378810882568, | |
"eval_runtime": 0.8363, | |
"eval_samples_per_second": 119.577, | |
"eval_steps_per_second": 3.587, | |
"step": 46 | |
}, | |
{ | |
"epoch": 9.4, | |
"grad_norm": 2.766176223754883, | |
"learning_rate": 3.125e-06, | |
"loss": 0.7343, | |
"step": 47 | |
}, | |
{ | |
"epoch": 9.4, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.6941795945167542, | |
"eval_runtime": 0.8392, | |
"eval_samples_per_second": 119.163, | |
"eval_steps_per_second": 3.575, | |
"step": 47 | |
}, | |
{ | |
"epoch": 9.6, | |
"grad_norm": 3.535402774810791, | |
"learning_rate": 2.0833333333333334e-06, | |
"loss": 0.7072, | |
"step": 48 | |
}, | |
{ | |
"epoch": 9.6, | |
"eval_accuracy": 0.48, | |
"eval_loss": 0.6940819621086121, | |
"eval_runtime": 0.79, | |
"eval_samples_per_second": 126.575, | |
"eval_steps_per_second": 3.797, | |
"step": 48 | |
}, | |
{ | |
"epoch": 9.8, | |
"grad_norm": 4.9612932205200195, | |
"learning_rate": 1.0416666666666667e-06, | |
"loss": 0.6949, | |
"step": 49 | |
}, | |
{ | |
"epoch": 9.8, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.6937304139137268, | |
"eval_runtime": 0.8383, | |
"eval_samples_per_second": 119.288, | |
"eval_steps_per_second": 3.579, | |
"step": 49 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 3.1242637634277344, | |
"learning_rate": 0.0, | |
"loss": 0.6862, | |
"step": 50 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.6938574910163879, | |
"eval_runtime": 0.7902, | |
"eval_samples_per_second": 126.558, | |
"eval_steps_per_second": 3.797, | |
"step": 50 | |
}, | |
{ | |
"epoch": 10.0, | |
"step": 50, | |
"total_flos": 24115496386560.0, | |
"train_loss": 0.7265405237674714, | |
"train_runtime": 165.0104, | |
"train_samples_per_second": 24.241, | |
"train_steps_per_second": 0.303 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 50, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"total_flos": 24115496386560.0, | |
"train_batch_size": 10, | |
"trial_name": null, | |
"trial_params": null | |
} | |