adapters-llama2-gptq-QLORA-super_glue-copa
/
trainer_state-llama2-gptq-QLORA-super_glue-copa-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 1, | |
"global_step": 50, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.2, | |
"grad_norm": 78.1366195678711, | |
"learning_rate": 2.5e-05, | |
"loss": 1.9894, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_accuracy": 0.55, | |
"eval_loss": 1.7598272562026978, | |
"eval_runtime": 1.0005, | |
"eval_samples_per_second": 99.95, | |
"eval_steps_per_second": 2.999, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 73.63365936279297, | |
"learning_rate": 5e-05, | |
"loss": 1.8056, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_accuracy": 0.55, | |
"eval_loss": 1.6865791082382202, | |
"eval_runtime": 0.9969, | |
"eval_samples_per_second": 100.314, | |
"eval_steps_per_second": 3.009, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.6, | |
"grad_norm": 85.16033172607422, | |
"learning_rate": 4.8958333333333335e-05, | |
"loss": 1.9709, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.6, | |
"eval_accuracy": 0.55, | |
"eval_loss": 1.3770341873168945, | |
"eval_runtime": 1.0002, | |
"eval_samples_per_second": 99.978, | |
"eval_steps_per_second": 2.999, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 69.2215347290039, | |
"learning_rate": 4.791666666666667e-05, | |
"loss": 1.4335, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_accuracy": 0.54, | |
"eval_loss": 1.0766552686691284, | |
"eval_runtime": 0.9982, | |
"eval_samples_per_second": 100.18, | |
"eval_steps_per_second": 3.005, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 69.37948608398438, | |
"learning_rate": 4.6875e-05, | |
"loss": 1.3065, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_accuracy": 0.49, | |
"eval_loss": 0.8532812595367432, | |
"eval_runtime": 0.9986, | |
"eval_samples_per_second": 100.144, | |
"eval_steps_per_second": 3.004, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.2, | |
"grad_norm": 15.690597534179688, | |
"learning_rate": 4.5833333333333334e-05, | |
"loss": 0.8055, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.2, | |
"eval_accuracy": 0.43, | |
"eval_loss": 0.8792579174041748, | |
"eval_runtime": 0.9976, | |
"eval_samples_per_second": 100.242, | |
"eval_steps_per_second": 3.007, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.4, | |
"grad_norm": 20.13286018371582, | |
"learning_rate": 4.4791666666666673e-05, | |
"loss": 0.7901, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.4, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.9624806046485901, | |
"eval_runtime": 0.9981, | |
"eval_samples_per_second": 100.193, | |
"eval_steps_per_second": 3.006, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 5.53256893157959, | |
"learning_rate": 4.375e-05, | |
"loss": 0.7524, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.6, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.9771338105201721, | |
"eval_runtime": 0.997, | |
"eval_samples_per_second": 100.301, | |
"eval_steps_per_second": 3.009, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.8, | |
"grad_norm": 29.057296752929688, | |
"learning_rate": 4.270833333333333e-05, | |
"loss": 0.7744, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.8, | |
"eval_accuracy": 0.41, | |
"eval_loss": 0.9202635884284973, | |
"eval_runtime": 0.997, | |
"eval_samples_per_second": 100.301, | |
"eval_steps_per_second": 3.009, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 23.430646896362305, | |
"learning_rate": 4.166666666666667e-05, | |
"loss": 0.7863, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.8504493832588196, | |
"eval_runtime": 0.9988, | |
"eval_samples_per_second": 100.118, | |
"eval_steps_per_second": 3.004, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.2, | |
"grad_norm": 17.951738357543945, | |
"learning_rate": 4.0625000000000005e-05, | |
"loss": 0.6977, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.2, | |
"eval_accuracy": 0.43, | |
"eval_loss": 0.8065915703773499, | |
"eval_runtime": 0.9996, | |
"eval_samples_per_second": 100.035, | |
"eval_steps_per_second": 3.001, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.4, | |
"grad_norm": 16.09232521057129, | |
"learning_rate": 3.958333333333333e-05, | |
"loss": 0.75, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.4, | |
"eval_accuracy": 0.43, | |
"eval_loss": 0.7995800971984863, | |
"eval_runtime": 0.9964, | |
"eval_samples_per_second": 100.364, | |
"eval_steps_per_second": 3.011, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.6, | |
"grad_norm": 16.51948356628418, | |
"learning_rate": 3.854166666666667e-05, | |
"loss": 0.8325, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.6, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.7968456745147705, | |
"eval_runtime": 0.9962, | |
"eval_samples_per_second": 100.386, | |
"eval_steps_per_second": 3.012, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.8, | |
"grad_norm": 18.92729949951172, | |
"learning_rate": 3.7500000000000003e-05, | |
"loss": 0.7324, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.8, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.792890727519989, | |
"eval_runtime": 0.9961, | |
"eval_samples_per_second": 100.395, | |
"eval_steps_per_second": 3.012, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 8.970295906066895, | |
"learning_rate": 3.6458333333333336e-05, | |
"loss": 0.7882, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.43, | |
"eval_loss": 0.786152184009552, | |
"eval_runtime": 0.999, | |
"eval_samples_per_second": 100.104, | |
"eval_steps_per_second": 3.003, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.2, | |
"grad_norm": 7.676571369171143, | |
"learning_rate": 3.541666666666667e-05, | |
"loss": 0.7451, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.2, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.785478413105011, | |
"eval_runtime": 0.9973, | |
"eval_samples_per_second": 100.272, | |
"eval_steps_per_second": 3.008, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.4, | |
"grad_norm": 17.803117752075195, | |
"learning_rate": 3.4375e-05, | |
"loss": 0.7071, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.4, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.7892186045646667, | |
"eval_runtime": 1.0007, | |
"eval_samples_per_second": 99.933, | |
"eval_steps_per_second": 2.998, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.6, | |
"grad_norm": 16.301389694213867, | |
"learning_rate": 3.3333333333333335e-05, | |
"loss": 0.7665, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.6, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.8022948503494263, | |
"eval_runtime": 0.9993, | |
"eval_samples_per_second": 100.071, | |
"eval_steps_per_second": 3.002, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.8, | |
"grad_norm": 9.103254318237305, | |
"learning_rate": 3.229166666666667e-05, | |
"loss": 0.7503, | |
"step": 19 | |
}, | |
{ | |
"epoch": 3.8, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.8114063739776611, | |
"eval_runtime": 0.9942, | |
"eval_samples_per_second": 100.58, | |
"eval_steps_per_second": 3.017, | |
"step": 19 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 14.845060348510742, | |
"learning_rate": 3.125e-05, | |
"loss": 0.6844, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.8062010407447815, | |
"eval_runtime": 0.9971, | |
"eval_samples_per_second": 100.295, | |
"eval_steps_per_second": 3.009, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.2, | |
"grad_norm": 19.840051651000977, | |
"learning_rate": 3.0208333333333334e-05, | |
"loss": 0.7454, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.2, | |
"eval_accuracy": 0.44, | |
"eval_loss": 0.7904101610183716, | |
"eval_runtime": 0.9973, | |
"eval_samples_per_second": 100.271, | |
"eval_steps_per_second": 3.008, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.4, | |
"grad_norm": 3.92164945602417, | |
"learning_rate": 2.916666666666667e-05, | |
"loss": 0.6977, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.4, | |
"eval_accuracy": 0.45, | |
"eval_loss": 0.7847166657447815, | |
"eval_runtime": 0.9986, | |
"eval_samples_per_second": 100.142, | |
"eval_steps_per_second": 3.004, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.6, | |
"grad_norm": 7.615128040313721, | |
"learning_rate": 2.8125000000000003e-05, | |
"loss": 0.7374, | |
"step": 23 | |
}, | |
{ | |
"epoch": 4.6, | |
"eval_accuracy": 0.46, | |
"eval_loss": 0.7748146057128906, | |
"eval_runtime": 0.9975, | |
"eval_samples_per_second": 100.247, | |
"eval_steps_per_second": 3.007, | |
"step": 23 | |
}, | |
{ | |
"epoch": 4.8, | |
"grad_norm": 7.336858749389648, | |
"learning_rate": 2.7083333333333332e-05, | |
"loss": 0.6703, | |
"step": 24 | |
}, | |
{ | |
"epoch": 4.8, | |
"eval_accuracy": 0.48, | |
"eval_loss": 0.7589452266693115, | |
"eval_runtime": 0.9975, | |
"eval_samples_per_second": 100.247, | |
"eval_steps_per_second": 3.007, | |
"step": 24 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 8.808582305908203, | |
"learning_rate": 2.604166666666667e-05, | |
"loss": 0.6783, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_accuracy": 0.49, | |
"eval_loss": 0.7491601705551147, | |
"eval_runtime": 0.9972, | |
"eval_samples_per_second": 100.279, | |
"eval_steps_per_second": 3.008, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.2, | |
"grad_norm": 3.35099720954895, | |
"learning_rate": 2.5e-05, | |
"loss": 0.6878, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.2, | |
"eval_accuracy": 0.47, | |
"eval_loss": 0.7470411062240601, | |
"eval_runtime": 0.9968, | |
"eval_samples_per_second": 100.316, | |
"eval_steps_per_second": 3.009, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.4, | |
"grad_norm": 6.638521194458008, | |
"learning_rate": 2.3958333333333334e-05, | |
"loss": 0.6909, | |
"step": 27 | |
}, | |
{ | |
"epoch": 5.4, | |
"eval_accuracy": 0.48, | |
"eval_loss": 0.74560546875, | |
"eval_runtime": 0.9981, | |
"eval_samples_per_second": 100.19, | |
"eval_steps_per_second": 3.006, | |
"step": 27 | |
}, | |
{ | |
"epoch": 5.6, | |
"grad_norm": 11.796442031860352, | |
"learning_rate": 2.2916666666666667e-05, | |
"loss": 0.6564, | |
"step": 28 | |
}, | |
{ | |
"epoch": 5.6, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.750224769115448, | |
"eval_runtime": 1.0001, | |
"eval_samples_per_second": 99.987, | |
"eval_steps_per_second": 3.0, | |
"step": 28 | |
}, | |
{ | |
"epoch": 5.8, | |
"grad_norm": 10.697065353393555, | |
"learning_rate": 2.1875e-05, | |
"loss": 0.7397, | |
"step": 29 | |
}, | |
{ | |
"epoch": 5.8, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.7502343654632568, | |
"eval_runtime": 0.9996, | |
"eval_samples_per_second": 100.035, | |
"eval_steps_per_second": 3.001, | |
"step": 29 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 16.58921241760254, | |
"learning_rate": 2.0833333333333336e-05, | |
"loss": 0.641, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_accuracy": 0.51, | |
"eval_loss": 0.7463575601577759, | |
"eval_runtime": 0.9995, | |
"eval_samples_per_second": 100.051, | |
"eval_steps_per_second": 3.002, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.2, | |
"grad_norm": 7.851564407348633, | |
"learning_rate": 1.9791666666666665e-05, | |
"loss": 0.6272, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.2, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.7356445789337158, | |
"eval_runtime": 0.9987, | |
"eval_samples_per_second": 100.129, | |
"eval_steps_per_second": 3.004, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.4, | |
"grad_norm": 11.272120475769043, | |
"learning_rate": 1.8750000000000002e-05, | |
"loss": 0.6667, | |
"step": 32 | |
}, | |
{ | |
"epoch": 6.4, | |
"eval_accuracy": 0.51, | |
"eval_loss": 0.7220800518989563, | |
"eval_runtime": 0.9996, | |
"eval_samples_per_second": 100.044, | |
"eval_steps_per_second": 3.001, | |
"step": 32 | |
}, | |
{ | |
"epoch": 6.6, | |
"grad_norm": 4.333158493041992, | |
"learning_rate": 1.7708333333333335e-05, | |
"loss": 0.6604, | |
"step": 33 | |
}, | |
{ | |
"epoch": 6.6, | |
"eval_accuracy": 0.5, | |
"eval_loss": 0.7124804854393005, | |
"eval_runtime": 0.997, | |
"eval_samples_per_second": 100.298, | |
"eval_steps_per_second": 3.009, | |
"step": 33 | |
}, | |
{ | |
"epoch": 6.8, | |
"grad_norm": 4.1158127784729, | |
"learning_rate": 1.6666666666666667e-05, | |
"loss": 0.6196, | |
"step": 34 | |
}, | |
{ | |
"epoch": 6.8, | |
"eval_accuracy": 0.51, | |
"eval_loss": 0.7032715082168579, | |
"eval_runtime": 0.9988, | |
"eval_samples_per_second": 100.122, | |
"eval_steps_per_second": 3.004, | |
"step": 34 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 12.1450834274292, | |
"learning_rate": 1.5625e-05, | |
"loss": 0.6995, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6988573670387268, | |
"eval_runtime": 0.9973, | |
"eval_samples_per_second": 100.27, | |
"eval_steps_per_second": 3.008, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.2, | |
"grad_norm": 5.940031051635742, | |
"learning_rate": 1.4583333333333335e-05, | |
"loss": 0.6227, | |
"step": 36 | |
}, | |
{ | |
"epoch": 7.2, | |
"eval_accuracy": 0.52, | |
"eval_loss": 0.6967969536781311, | |
"eval_runtime": 0.9976, | |
"eval_samples_per_second": 100.24, | |
"eval_steps_per_second": 3.007, | |
"step": 36 | |
}, | |
{ | |
"epoch": 7.4, | |
"grad_norm": 17.160533905029297, | |
"learning_rate": 1.3541666666666666e-05, | |
"loss": 0.6482, | |
"step": 37 | |
}, | |
{ | |
"epoch": 7.4, | |
"eval_accuracy": 0.54, | |
"eval_loss": 0.6968165636062622, | |
"eval_runtime": 0.9978, | |
"eval_samples_per_second": 100.221, | |
"eval_steps_per_second": 3.007, | |
"step": 37 | |
}, | |
{ | |
"epoch": 7.6, | |
"grad_norm": 2.506211757659912, | |
"learning_rate": 1.25e-05, | |
"loss": 0.6068, | |
"step": 38 | |
}, | |
{ | |
"epoch": 7.6, | |
"eval_accuracy": 0.57, | |
"eval_loss": 0.6994236707687378, | |
"eval_runtime": 0.9998, | |
"eval_samples_per_second": 100.024, | |
"eval_steps_per_second": 3.001, | |
"step": 38 | |
}, | |
{ | |
"epoch": 7.8, | |
"grad_norm": 4.562036991119385, | |
"learning_rate": 1.1458333333333333e-05, | |
"loss": 0.6346, | |
"step": 39 | |
}, | |
{ | |
"epoch": 7.8, | |
"eval_accuracy": 0.59, | |
"eval_loss": 0.6964159607887268, | |
"eval_runtime": 0.9979, | |
"eval_samples_per_second": 100.214, | |
"eval_steps_per_second": 3.006, | |
"step": 39 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 19.430461883544922, | |
"learning_rate": 1.0416666666666668e-05, | |
"loss": 0.6444, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.59, | |
"eval_loss": 0.691386878490448, | |
"eval_runtime": 0.9975, | |
"eval_samples_per_second": 100.251, | |
"eval_steps_per_second": 3.008, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.2, | |
"grad_norm": 12.345283508300781, | |
"learning_rate": 9.375000000000001e-06, | |
"loss": 0.6287, | |
"step": 41 | |
}, | |
{ | |
"epoch": 8.2, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.6884568929672241, | |
"eval_runtime": 0.9962, | |
"eval_samples_per_second": 100.38, | |
"eval_steps_per_second": 3.011, | |
"step": 41 | |
}, | |
{ | |
"epoch": 8.4, | |
"grad_norm": 14.297842025756836, | |
"learning_rate": 8.333333333333334e-06, | |
"loss": 0.6198, | |
"step": 42 | |
}, | |
{ | |
"epoch": 8.4, | |
"eval_accuracy": 0.58, | |
"eval_loss": 0.6812109351158142, | |
"eval_runtime": 0.998, | |
"eval_samples_per_second": 100.196, | |
"eval_steps_per_second": 3.006, | |
"step": 42 | |
}, | |
{ | |
"epoch": 8.6, | |
"grad_norm": 4.835755825042725, | |
"learning_rate": 7.2916666666666674e-06, | |
"loss": 0.6104, | |
"step": 43 | |
}, | |
{ | |
"epoch": 8.6, | |
"eval_accuracy": 0.59, | |
"eval_loss": 0.6766307353973389, | |
"eval_runtime": 1.0003, | |
"eval_samples_per_second": 99.967, | |
"eval_steps_per_second": 2.999, | |
"step": 43 | |
}, | |
{ | |
"epoch": 8.8, | |
"grad_norm": 7.960282325744629, | |
"learning_rate": 6.25e-06, | |
"loss": 0.6007, | |
"step": 44 | |
}, | |
{ | |
"epoch": 8.8, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.6741991639137268, | |
"eval_runtime": 0.9973, | |
"eval_samples_per_second": 100.273, | |
"eval_steps_per_second": 3.008, | |
"step": 44 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 6.917936325073242, | |
"learning_rate": 5.208333333333334e-06, | |
"loss": 0.6092, | |
"step": 45 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_accuracy": 0.61, | |
"eval_loss": 0.6733300685882568, | |
"eval_runtime": 0.9989, | |
"eval_samples_per_second": 100.109, | |
"eval_steps_per_second": 3.003, | |
"step": 45 | |
}, | |
{ | |
"epoch": 9.2, | |
"grad_norm": 2.6899542808532715, | |
"learning_rate": 4.166666666666667e-06, | |
"loss": 0.6177, | |
"step": 46 | |
}, | |
{ | |
"epoch": 9.2, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.6735841631889343, | |
"eval_runtime": 1.0001, | |
"eval_samples_per_second": 99.991, | |
"eval_steps_per_second": 3.0, | |
"step": 46 | |
}, | |
{ | |
"epoch": 9.4, | |
"grad_norm": 3.7043120861053467, | |
"learning_rate": 3.125e-06, | |
"loss": 0.6172, | |
"step": 47 | |
}, | |
{ | |
"epoch": 9.4, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.673154354095459, | |
"eval_runtime": 0.9971, | |
"eval_samples_per_second": 100.293, | |
"eval_steps_per_second": 3.009, | |
"step": 47 | |
}, | |
{ | |
"epoch": 9.6, | |
"grad_norm": 4.5208659172058105, | |
"learning_rate": 2.0833333333333334e-06, | |
"loss": 0.6193, | |
"step": 48 | |
}, | |
{ | |
"epoch": 9.6, | |
"eval_accuracy": 0.61, | |
"eval_loss": 0.6734569072723389, | |
"eval_runtime": 0.9958, | |
"eval_samples_per_second": 100.42, | |
"eval_steps_per_second": 3.013, | |
"step": 48 | |
}, | |
{ | |
"epoch": 9.8, | |
"grad_norm": 10.0933837890625, | |
"learning_rate": 1.0416666666666667e-06, | |
"loss": 0.5712, | |
"step": 49 | |
}, | |
{ | |
"epoch": 9.8, | |
"eval_accuracy": 0.61, | |
"eval_loss": 0.6726464629173279, | |
"eval_runtime": 0.9963, | |
"eval_samples_per_second": 100.375, | |
"eval_steps_per_second": 3.011, | |
"step": 49 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 3.2562851905822754, | |
"learning_rate": 0.0, | |
"loss": 0.5908, | |
"step": 50 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.6712987422943115, | |
"eval_runtime": 0.9969, | |
"eval_samples_per_second": 100.308, | |
"eval_steps_per_second": 3.009, | |
"step": 50 | |
}, | |
{ | |
"epoch": 10.0, | |
"step": 50, | |
"total_flos": 27858578374656.0, | |
"train_loss": 0.7865762293338776, | |
"train_runtime": 193.0614, | |
"train_samples_per_second": 20.719, | |
"train_steps_per_second": 0.259 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 50, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": false, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 27858578374656.0, | |
"train_batch_size": 10, | |
"trial_name": null, | |
"trial_params": null | |
} | |