|
{ |
|
"best_metric": 0.7, |
|
"best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-ADC-4cls-0922/checkpoint-122", |
|
"epoch": 200.0, |
|
"eval_steps": 500, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9655490517616272, |
|
"eval_runtime": 0.8298, |
|
"eval_samples_per_second": 84.356, |
|
"eval_steps_per_second": 2.41, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9653854370117188, |
|
"eval_runtime": 0.6383, |
|
"eval_samples_per_second": 109.671, |
|
"eval_steps_per_second": 3.133, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9650949835777283, |
|
"eval_runtime": 0.6412, |
|
"eval_samples_per_second": 109.167, |
|
"eval_steps_per_second": 3.119, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9646532535552979, |
|
"eval_runtime": 0.8218, |
|
"eval_samples_per_second": 85.18, |
|
"eval_steps_per_second": 2.434, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.0064, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9641380310058594, |
|
"eval_runtime": 0.6452, |
|
"eval_samples_per_second": 108.491, |
|
"eval_steps_per_second": 3.1, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9635317921638489, |
|
"eval_runtime": 0.6347, |
|
"eval_samples_per_second": 110.284, |
|
"eval_steps_per_second": 3.151, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9628700017929077, |
|
"eval_runtime": 0.8273, |
|
"eval_samples_per_second": 84.611, |
|
"eval_steps_per_second": 2.417, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9623274803161621, |
|
"eval_runtime": 0.6551, |
|
"eval_samples_per_second": 106.859, |
|
"eval_steps_per_second": 3.053, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9616996645927429, |
|
"eval_runtime": 0.646, |
|
"eval_samples_per_second": 108.363, |
|
"eval_steps_per_second": 3.096, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.9821, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9611372947692871, |
|
"eval_runtime": 0.8313, |
|
"eval_samples_per_second": 84.202, |
|
"eval_steps_per_second": 2.406, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9607454538345337, |
|
"eval_runtime": 0.8335, |
|
"eval_samples_per_second": 83.985, |
|
"eval_steps_per_second": 2.4, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9604489207267761, |
|
"eval_runtime": 0.8194, |
|
"eval_samples_per_second": 85.429, |
|
"eval_steps_per_second": 2.441, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9601203799247742, |
|
"eval_runtime": 0.8211, |
|
"eval_samples_per_second": 85.256, |
|
"eval_steps_per_second": 2.436, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9597390294075012, |
|
"eval_runtime": 0.6563, |
|
"eval_samples_per_second": 106.663, |
|
"eval_steps_per_second": 3.048, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.0278, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9591529965400696, |
|
"eval_runtime": 0.6495, |
|
"eval_samples_per_second": 107.778, |
|
"eval_steps_per_second": 3.079, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9581246376037598, |
|
"eval_runtime": 0.791, |
|
"eval_samples_per_second": 88.495, |
|
"eval_steps_per_second": 2.528, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9566996097564697, |
|
"eval_runtime": 0.6461, |
|
"eval_samples_per_second": 108.347, |
|
"eval_steps_per_second": 3.096, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9551236629486084, |
|
"eval_runtime": 0.6456, |
|
"eval_samples_per_second": 108.429, |
|
"eval_steps_per_second": 3.098, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9534342288970947, |
|
"eval_runtime": 0.8038, |
|
"eval_samples_per_second": 87.083, |
|
"eval_steps_per_second": 2.488, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9986, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9513913989067078, |
|
"eval_runtime": 0.6423, |
|
"eval_samples_per_second": 108.98, |
|
"eval_steps_per_second": 3.114, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9493252635002136, |
|
"eval_runtime": 0.6401, |
|
"eval_samples_per_second": 109.357, |
|
"eval_steps_per_second": 3.124, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.9471749663352966, |
|
"eval_runtime": 0.7957, |
|
"eval_samples_per_second": 87.97, |
|
"eval_steps_per_second": 2.513, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.9451875686645508, |
|
"eval_runtime": 0.6379, |
|
"eval_samples_per_second": 109.728, |
|
"eval_steps_per_second": 3.135, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.943417489528656, |
|
"eval_runtime": 0.6466, |
|
"eval_samples_per_second": 108.259, |
|
"eval_steps_per_second": 3.093, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.9973, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.9419717788696289, |
|
"eval_runtime": 0.8115, |
|
"eval_samples_per_second": 86.264, |
|
"eval_steps_per_second": 2.465, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.9404588937759399, |
|
"eval_runtime": 0.6332, |
|
"eval_samples_per_second": 110.551, |
|
"eval_steps_per_second": 3.159, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.6285714285714286, |
|
"eval_loss": 0.9387302994728088, |
|
"eval_runtime": 0.64, |
|
"eval_samples_per_second": 109.375, |
|
"eval_steps_per_second": 3.125, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6285714285714286, |
|
"eval_loss": 0.9375677704811096, |
|
"eval_runtime": 0.8312, |
|
"eval_samples_per_second": 84.219, |
|
"eval_steps_per_second": 2.406, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.9368333220481873, |
|
"eval_runtime": 0.6385, |
|
"eval_samples_per_second": 109.629, |
|
"eval_steps_per_second": 3.132, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.9936, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.9361710548400879, |
|
"eval_runtime": 0.6573, |
|
"eval_samples_per_second": 106.497, |
|
"eval_steps_per_second": 3.043, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9361298680305481, |
|
"eval_runtime": 0.7944, |
|
"eval_samples_per_second": 88.115, |
|
"eval_steps_per_second": 2.518, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9364449381828308, |
|
"eval_runtime": 0.6554, |
|
"eval_samples_per_second": 106.808, |
|
"eval_steps_per_second": 3.052, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9371016621589661, |
|
"eval_runtime": 0.6483, |
|
"eval_samples_per_second": 107.97, |
|
"eval_steps_per_second": 3.085, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.6428571428571429, |
|
"eval_loss": 0.9379546046257019, |
|
"eval_runtime": 0.8119, |
|
"eval_samples_per_second": 86.219, |
|
"eval_steps_per_second": 2.463, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 8.75e-05, |
|
"loss": 0.9746, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9379692077636719, |
|
"eval_runtime": 0.6362, |
|
"eval_samples_per_second": 110.031, |
|
"eval_steps_per_second": 3.144, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9374780654907227, |
|
"eval_runtime": 0.639, |
|
"eval_samples_per_second": 109.543, |
|
"eval_steps_per_second": 3.13, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9379698634147644, |
|
"eval_runtime": 0.8343, |
|
"eval_samples_per_second": 83.899, |
|
"eval_steps_per_second": 2.397, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9375231862068176, |
|
"eval_runtime": 0.6395, |
|
"eval_samples_per_second": 109.457, |
|
"eval_steps_per_second": 3.127, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9369739890098572, |
|
"eval_runtime": 0.6333, |
|
"eval_samples_per_second": 110.536, |
|
"eval_steps_per_second": 3.158, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0001, |
|
"loss": 1.0113, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9361743330955505, |
|
"eval_runtime": 0.7993, |
|
"eval_samples_per_second": 87.579, |
|
"eval_steps_per_second": 2.502, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9340663552284241, |
|
"eval_runtime": 0.6461, |
|
"eval_samples_per_second": 108.348, |
|
"eval_steps_per_second": 3.096, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.9300563335418701, |
|
"eval_runtime": 0.636, |
|
"eval_samples_per_second": 110.058, |
|
"eval_steps_per_second": 3.145, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9259787201881409, |
|
"eval_runtime": 0.8154, |
|
"eval_samples_per_second": 85.845, |
|
"eval_steps_per_second": 2.453, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9224489331245422, |
|
"eval_runtime": 0.6369, |
|
"eval_samples_per_second": 109.903, |
|
"eval_steps_per_second": 3.14, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 9.687500000000001e-05, |
|
"loss": 0.9756, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9190067648887634, |
|
"eval_runtime": 0.6388, |
|
"eval_samples_per_second": 109.577, |
|
"eval_steps_per_second": 3.131, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9154108166694641, |
|
"eval_runtime": 0.7966, |
|
"eval_samples_per_second": 87.873, |
|
"eval_steps_per_second": 2.511, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.912346363067627, |
|
"eval_runtime": 0.6406, |
|
"eval_samples_per_second": 109.268, |
|
"eval_steps_per_second": 3.122, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9091367721557617, |
|
"eval_runtime": 0.6398, |
|
"eval_samples_per_second": 109.41, |
|
"eval_steps_per_second": 3.126, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9070726037025452, |
|
"eval_runtime": 0.8188, |
|
"eval_samples_per_second": 85.488, |
|
"eval_steps_per_second": 2.443, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 9.375e-05, |
|
"loss": 0.9721, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9055730700492859, |
|
"eval_runtime": 0.6361, |
|
"eval_samples_per_second": 110.054, |
|
"eval_steps_per_second": 3.144, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9046576619148254, |
|
"eval_runtime": 0.6407, |
|
"eval_samples_per_second": 109.252, |
|
"eval_steps_per_second": 3.121, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.9038794636726379, |
|
"eval_runtime": 0.8178, |
|
"eval_samples_per_second": 85.592, |
|
"eval_steps_per_second": 2.445, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9030665755271912, |
|
"eval_runtime": 0.6283, |
|
"eval_samples_per_second": 111.419, |
|
"eval_steps_per_second": 3.183, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.902490496635437, |
|
"eval_runtime": 0.8366, |
|
"eval_samples_per_second": 83.669, |
|
"eval_steps_per_second": 2.391, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"learning_rate": 9.062500000000001e-05, |
|
"loss": 0.9698, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.902264416217804, |
|
"eval_runtime": 0.9891, |
|
"eval_samples_per_second": 70.774, |
|
"eval_steps_per_second": 2.022, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.9011555314064026, |
|
"eval_runtime": 0.6498, |
|
"eval_samples_per_second": 107.729, |
|
"eval_steps_per_second": 3.078, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8996686935424805, |
|
"eval_runtime": 0.8289, |
|
"eval_samples_per_second": 84.447, |
|
"eval_steps_per_second": 2.413, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8982025980949402, |
|
"eval_runtime": 0.6375, |
|
"eval_samples_per_second": 109.798, |
|
"eval_steps_per_second": 3.137, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8969982266426086, |
|
"eval_runtime": 0.6483, |
|
"eval_samples_per_second": 107.97, |
|
"eval_steps_per_second": 3.085, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 8.75e-05, |
|
"loss": 0.9341, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8956836462020874, |
|
"eval_runtime": 0.8303, |
|
"eval_samples_per_second": 84.307, |
|
"eval_steps_per_second": 2.409, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8946982622146606, |
|
"eval_runtime": 0.6483, |
|
"eval_samples_per_second": 107.981, |
|
"eval_steps_per_second": 3.085, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8940390348434448, |
|
"eval_runtime": 0.6421, |
|
"eval_samples_per_second": 109.023, |
|
"eval_steps_per_second": 3.115, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8940520286560059, |
|
"eval_runtime": 0.8356, |
|
"eval_samples_per_second": 83.773, |
|
"eval_steps_per_second": 2.394, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8934383988380432, |
|
"eval_runtime": 0.6317, |
|
"eval_samples_per_second": 110.812, |
|
"eval_steps_per_second": 3.166, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"learning_rate": 8.4375e-05, |
|
"loss": 0.9717, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8916982412338257, |
|
"eval_runtime": 0.6456, |
|
"eval_samples_per_second": 108.418, |
|
"eval_steps_per_second": 3.098, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8898113369941711, |
|
"eval_runtime": 0.8145, |
|
"eval_samples_per_second": 85.937, |
|
"eval_steps_per_second": 2.455, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8883917927742004, |
|
"eval_runtime": 0.6387, |
|
"eval_samples_per_second": 109.599, |
|
"eval_steps_per_second": 3.131, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8869962692260742, |
|
"eval_runtime": 0.6406, |
|
"eval_samples_per_second": 109.266, |
|
"eval_steps_per_second": 3.122, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8853691816329956, |
|
"eval_runtime": 0.8216, |
|
"eval_samples_per_second": 85.2, |
|
"eval_steps_per_second": 2.434, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 8.125000000000001e-05, |
|
"loss": 0.9655, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8840075731277466, |
|
"eval_runtime": 0.6378, |
|
"eval_samples_per_second": 109.751, |
|
"eval_steps_per_second": 3.136, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8826519250869751, |
|
"eval_runtime": 0.6384, |
|
"eval_samples_per_second": 109.644, |
|
"eval_steps_per_second": 3.133, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8813565373420715, |
|
"eval_runtime": 0.8402, |
|
"eval_samples_per_second": 83.313, |
|
"eval_steps_per_second": 2.38, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8805155754089355, |
|
"eval_runtime": 0.6428, |
|
"eval_samples_per_second": 108.905, |
|
"eval_steps_per_second": 3.112, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8803040385246277, |
|
"eval_runtime": 0.649, |
|
"eval_samples_per_second": 107.857, |
|
"eval_steps_per_second": 3.082, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"learning_rate": 7.8125e-05, |
|
"loss": 0.9458, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8801725506782532, |
|
"eval_runtime": 0.82, |
|
"eval_samples_per_second": 85.365, |
|
"eval_steps_per_second": 2.439, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8797475695610046, |
|
"eval_runtime": 0.6476, |
|
"eval_samples_per_second": 108.085, |
|
"eval_steps_per_second": 3.088, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8793725967407227, |
|
"eval_runtime": 0.6468, |
|
"eval_samples_per_second": 108.22, |
|
"eval_steps_per_second": 3.092, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8795827031135559, |
|
"eval_runtime": 0.8346, |
|
"eval_samples_per_second": 83.873, |
|
"eval_steps_per_second": 2.396, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8807878494262695, |
|
"eval_runtime": 0.6453, |
|
"eval_samples_per_second": 108.479, |
|
"eval_steps_per_second": 3.099, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 0.9094, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8817013502120972, |
|
"eval_runtime": 0.6393, |
|
"eval_samples_per_second": 109.492, |
|
"eval_steps_per_second": 3.128, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 81.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8828238844871521, |
|
"eval_runtime": 0.8346, |
|
"eval_samples_per_second": 83.868, |
|
"eval_steps_per_second": 2.396, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 82.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8835611939430237, |
|
"eval_runtime": 0.636, |
|
"eval_samples_per_second": 110.07, |
|
"eval_steps_per_second": 3.145, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 83.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8830356001853943, |
|
"eval_runtime": 0.6535, |
|
"eval_samples_per_second": 107.117, |
|
"eval_steps_per_second": 3.06, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.8820751905441284, |
|
"eval_runtime": 0.8384, |
|
"eval_samples_per_second": 83.495, |
|
"eval_steps_per_second": 2.386, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"learning_rate": 7.1875e-05, |
|
"loss": 0.8719, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 85.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.8812506794929504, |
|
"eval_runtime": 0.6519, |
|
"eval_samples_per_second": 107.372, |
|
"eval_steps_per_second": 3.068, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 86.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8804309368133545, |
|
"eval_runtime": 0.6326, |
|
"eval_samples_per_second": 110.652, |
|
"eval_steps_per_second": 3.161, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 87.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.8798118829727173, |
|
"eval_runtime": 0.8338, |
|
"eval_samples_per_second": 83.95, |
|
"eval_steps_per_second": 2.399, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.8787184953689575, |
|
"eval_runtime": 0.64, |
|
"eval_samples_per_second": 109.38, |
|
"eval_steps_per_second": 3.125, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 89.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 0.8769770264625549, |
|
"eval_runtime": 0.6382, |
|
"eval_samples_per_second": 109.679, |
|
"eval_steps_per_second": 3.134, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"learning_rate": 6.875e-05, |
|
"loss": 0.9288, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 90.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8752025961875916, |
|
"eval_runtime": 0.8649, |
|
"eval_samples_per_second": 80.934, |
|
"eval_steps_per_second": 2.312, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 91.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8721939921379089, |
|
"eval_runtime": 0.6536, |
|
"eval_samples_per_second": 107.101, |
|
"eval_steps_per_second": 3.06, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8693682551383972, |
|
"eval_runtime": 0.6434, |
|
"eval_samples_per_second": 108.799, |
|
"eval_steps_per_second": 3.109, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 93.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8670406937599182, |
|
"eval_runtime": 0.8337, |
|
"eval_samples_per_second": 83.963, |
|
"eval_steps_per_second": 2.399, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 94.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8644655346870422, |
|
"eval_runtime": 0.6432, |
|
"eval_samples_per_second": 108.826, |
|
"eval_steps_per_second": 3.109, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"learning_rate": 6.562500000000001e-05, |
|
"loss": 0.9039, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 95.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8624207973480225, |
|
"eval_runtime": 0.6482, |
|
"eval_samples_per_second": 107.999, |
|
"eval_steps_per_second": 3.086, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 96.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8603058457374573, |
|
"eval_runtime": 0.8409, |
|
"eval_samples_per_second": 83.249, |
|
"eval_steps_per_second": 2.379, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 97.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8583868741989136, |
|
"eval_runtime": 0.6484, |
|
"eval_samples_per_second": 107.951, |
|
"eval_steps_per_second": 3.084, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 98.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8566268086433411, |
|
"eval_runtime": 0.6949, |
|
"eval_samples_per_second": 100.728, |
|
"eval_steps_per_second": 2.878, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 99.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8553413152694702, |
|
"eval_runtime": 0.8276, |
|
"eval_samples_per_second": 84.585, |
|
"eval_steps_per_second": 2.417, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 6.25e-05, |
|
"loss": 0.9081, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8549684286117554, |
|
"eval_runtime": 0.6594, |
|
"eval_samples_per_second": 106.164, |
|
"eval_steps_per_second": 3.033, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 101.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8551309108734131, |
|
"eval_runtime": 0.6588, |
|
"eval_samples_per_second": 106.255, |
|
"eval_steps_per_second": 3.036, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 102.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8556391000747681, |
|
"eval_runtime": 0.8474, |
|
"eval_samples_per_second": 82.605, |
|
"eval_steps_per_second": 2.36, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 103.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8558002710342407, |
|
"eval_runtime": 0.6568, |
|
"eval_samples_per_second": 106.577, |
|
"eval_steps_per_second": 3.045, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 104.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8554455637931824, |
|
"eval_runtime": 0.6448, |
|
"eval_samples_per_second": 108.569, |
|
"eval_steps_per_second": 3.102, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"learning_rate": 5.9375e-05, |
|
"loss": 0.9142, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 105.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8551297783851624, |
|
"eval_runtime": 0.8226, |
|
"eval_samples_per_second": 85.093, |
|
"eval_steps_per_second": 2.431, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 106.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8553109169006348, |
|
"eval_runtime": 0.6501, |
|
"eval_samples_per_second": 107.668, |
|
"eval_steps_per_second": 3.076, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 107.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.855134904384613, |
|
"eval_runtime": 0.637, |
|
"eval_samples_per_second": 109.882, |
|
"eval_steps_per_second": 3.139, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 108.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8549013137817383, |
|
"eval_runtime": 0.8378, |
|
"eval_samples_per_second": 83.557, |
|
"eval_steps_per_second": 2.387, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 109.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.854942798614502, |
|
"eval_runtime": 0.6596, |
|
"eval_samples_per_second": 106.131, |
|
"eval_steps_per_second": 3.032, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"learning_rate": 5.6250000000000005e-05, |
|
"loss": 0.9347, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 110.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8551362752914429, |
|
"eval_runtime": 0.6674, |
|
"eval_samples_per_second": 104.886, |
|
"eval_steps_per_second": 2.997, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 111.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8553721308708191, |
|
"eval_runtime": 0.8336, |
|
"eval_samples_per_second": 83.974, |
|
"eval_steps_per_second": 2.399, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 112.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8548364639282227, |
|
"eval_runtime": 0.6506, |
|
"eval_samples_per_second": 107.599, |
|
"eval_steps_per_second": 3.074, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 113.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.853795051574707, |
|
"eval_runtime": 0.6756, |
|
"eval_samples_per_second": 103.611, |
|
"eval_steps_per_second": 2.96, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 114.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8524832129478455, |
|
"eval_runtime": 0.8168, |
|
"eval_samples_per_second": 85.696, |
|
"eval_steps_per_second": 2.448, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"learning_rate": 5.3125000000000004e-05, |
|
"loss": 0.8922, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 115.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8512247204780579, |
|
"eval_runtime": 0.6476, |
|
"eval_samples_per_second": 108.096, |
|
"eval_steps_per_second": 3.088, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 116.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8505221009254456, |
|
"eval_runtime": 0.6563, |
|
"eval_samples_per_second": 106.655, |
|
"eval_steps_per_second": 3.047, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 117.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.849509596824646, |
|
"eval_runtime": 0.8193, |
|
"eval_samples_per_second": 85.434, |
|
"eval_steps_per_second": 2.441, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 118.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8483795523643494, |
|
"eval_runtime": 0.6476, |
|
"eval_samples_per_second": 108.094, |
|
"eval_steps_per_second": 3.088, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 119.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8471851944923401, |
|
"eval_runtime": 0.6472, |
|
"eval_samples_per_second": 108.158, |
|
"eval_steps_per_second": 3.09, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8897, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 120.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8455559611320496, |
|
"eval_runtime": 0.8155, |
|
"eval_samples_per_second": 85.837, |
|
"eval_steps_per_second": 2.452, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 121.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8439861536026001, |
|
"eval_runtime": 0.6794, |
|
"eval_samples_per_second": 103.026, |
|
"eval_steps_per_second": 2.944, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 122.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8426181674003601, |
|
"eval_runtime": 0.6386, |
|
"eval_samples_per_second": 109.616, |
|
"eval_steps_per_second": 3.132, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 123.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8412323594093323, |
|
"eval_runtime": 0.8222, |
|
"eval_samples_per_second": 85.135, |
|
"eval_steps_per_second": 2.432, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 124.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8395997881889343, |
|
"eval_runtime": 0.6405, |
|
"eval_samples_per_second": 109.29, |
|
"eval_steps_per_second": 3.123, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"learning_rate": 4.6875e-05, |
|
"loss": 0.8829, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 125.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8383906483650208, |
|
"eval_runtime": 0.6384, |
|
"eval_samples_per_second": 109.656, |
|
"eval_steps_per_second": 3.133, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 126.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8372732996940613, |
|
"eval_runtime": 0.8007, |
|
"eval_samples_per_second": 87.425, |
|
"eval_steps_per_second": 2.498, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 127.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8365365266799927, |
|
"eval_runtime": 0.6412, |
|
"eval_samples_per_second": 109.171, |
|
"eval_steps_per_second": 3.119, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 128.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.835951030254364, |
|
"eval_runtime": 0.6518, |
|
"eval_samples_per_second": 107.389, |
|
"eval_steps_per_second": 3.068, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 129.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8352962732315063, |
|
"eval_runtime": 0.8209, |
|
"eval_samples_per_second": 85.273, |
|
"eval_steps_per_second": 2.436, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.8744, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 130.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8344349265098572, |
|
"eval_runtime": 0.6608, |
|
"eval_samples_per_second": 105.932, |
|
"eval_steps_per_second": 3.027, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 131.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8336659669876099, |
|
"eval_runtime": 0.6503, |
|
"eval_samples_per_second": 107.635, |
|
"eval_steps_per_second": 3.075, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 132.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8329463601112366, |
|
"eval_runtime": 0.824, |
|
"eval_samples_per_second": 84.952, |
|
"eval_steps_per_second": 2.427, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 133.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8324605822563171, |
|
"eval_runtime": 0.6594, |
|
"eval_samples_per_second": 106.156, |
|
"eval_steps_per_second": 3.033, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 134.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8318061232566833, |
|
"eval_runtime": 0.6395, |
|
"eval_samples_per_second": 109.457, |
|
"eval_steps_per_second": 3.127, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"learning_rate": 4.0625000000000005e-05, |
|
"loss": 0.8657, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 135.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8312056660652161, |
|
"eval_runtime": 0.8064, |
|
"eval_samples_per_second": 86.802, |
|
"eval_steps_per_second": 2.48, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 136.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8306312561035156, |
|
"eval_runtime": 0.645, |
|
"eval_samples_per_second": 108.533, |
|
"eval_steps_per_second": 3.101, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 137.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8299986720085144, |
|
"eval_runtime": 0.6678, |
|
"eval_samples_per_second": 104.823, |
|
"eval_steps_per_second": 2.995, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 138.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8296393752098083, |
|
"eval_runtime": 0.8159, |
|
"eval_samples_per_second": 85.792, |
|
"eval_steps_per_second": 2.451, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 139.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8294458389282227, |
|
"eval_runtime": 0.6396, |
|
"eval_samples_per_second": 109.442, |
|
"eval_steps_per_second": 3.127, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.9421, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 140.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8292441368103027, |
|
"eval_runtime": 0.6515, |
|
"eval_samples_per_second": 107.445, |
|
"eval_steps_per_second": 3.07, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 141.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8291121125221252, |
|
"eval_runtime": 0.8194, |
|
"eval_samples_per_second": 85.428, |
|
"eval_steps_per_second": 2.441, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 142.0, |
|
"eval_accuracy": 0.6714285714285714, |
|
"eval_loss": 0.8290067315101624, |
|
"eval_runtime": 0.9452, |
|
"eval_samples_per_second": 74.057, |
|
"eval_steps_per_second": 2.116, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 143.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8290221095085144, |
|
"eval_runtime": 0.6854, |
|
"eval_samples_per_second": 102.129, |
|
"eval_steps_per_second": 2.918, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 144.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8288514018058777, |
|
"eval_runtime": 0.6741, |
|
"eval_samples_per_second": 103.846, |
|
"eval_steps_per_second": 2.967, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"learning_rate": 3.4375e-05, |
|
"loss": 0.9066, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 145.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8286876082420349, |
|
"eval_runtime": 0.6545, |
|
"eval_samples_per_second": 106.944, |
|
"eval_steps_per_second": 3.056, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 146.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8290360569953918, |
|
"eval_runtime": 0.6611, |
|
"eval_samples_per_second": 105.889, |
|
"eval_steps_per_second": 3.025, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 147.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8293396830558777, |
|
"eval_runtime": 0.6543, |
|
"eval_samples_per_second": 106.98, |
|
"eval_steps_per_second": 3.057, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 148.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8294445872306824, |
|
"eval_runtime": 0.6455, |
|
"eval_samples_per_second": 108.45, |
|
"eval_steps_per_second": 3.099, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 149.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8294763565063477, |
|
"eval_runtime": 0.9727, |
|
"eval_samples_per_second": 71.966, |
|
"eval_steps_per_second": 2.056, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.9068, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 150.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8295239210128784, |
|
"eval_runtime": 0.9775, |
|
"eval_samples_per_second": 71.611, |
|
"eval_steps_per_second": 2.046, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8294230699539185, |
|
"eval_runtime": 0.6644, |
|
"eval_samples_per_second": 105.363, |
|
"eval_steps_per_second": 3.01, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 152.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.829305112361908, |
|
"eval_runtime": 0.6604, |
|
"eval_samples_per_second": 105.994, |
|
"eval_steps_per_second": 3.028, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 153.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8293172717094421, |
|
"eval_runtime": 0.8353, |
|
"eval_samples_per_second": 83.803, |
|
"eval_steps_per_second": 2.394, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 154.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8289957046508789, |
|
"eval_runtime": 0.6575, |
|
"eval_samples_per_second": 106.469, |
|
"eval_steps_per_second": 3.042, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"learning_rate": 2.8125000000000003e-05, |
|
"loss": 0.8715, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 155.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8286699056625366, |
|
"eval_runtime": 0.6466, |
|
"eval_samples_per_second": 108.266, |
|
"eval_steps_per_second": 3.093, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 156.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8283028602600098, |
|
"eval_runtime": 0.8251, |
|
"eval_samples_per_second": 84.843, |
|
"eval_steps_per_second": 2.424, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 157.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8276944160461426, |
|
"eval_runtime": 0.6461, |
|
"eval_samples_per_second": 108.335, |
|
"eval_steps_per_second": 3.095, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 158.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.827368438243866, |
|
"eval_runtime": 0.6771, |
|
"eval_samples_per_second": 103.379, |
|
"eval_steps_per_second": 2.954, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 159.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8269255757331848, |
|
"eval_runtime": 0.8454, |
|
"eval_samples_per_second": 82.804, |
|
"eval_steps_per_second": 2.366, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.8921, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 160.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.826560914516449, |
|
"eval_runtime": 0.6462, |
|
"eval_samples_per_second": 108.325, |
|
"eval_steps_per_second": 3.095, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 161.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8263527154922485, |
|
"eval_runtime": 0.6718, |
|
"eval_samples_per_second": 104.193, |
|
"eval_steps_per_second": 2.977, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 162.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.826131284236908, |
|
"eval_runtime": 0.8359, |
|
"eval_samples_per_second": 83.747, |
|
"eval_steps_per_second": 2.393, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 163.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8259814977645874, |
|
"eval_runtime": 0.6618, |
|
"eval_samples_per_second": 105.778, |
|
"eval_steps_per_second": 3.022, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 164.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8257696032524109, |
|
"eval_runtime": 0.6625, |
|
"eval_samples_per_second": 105.664, |
|
"eval_steps_per_second": 3.019, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"learning_rate": 2.1875e-05, |
|
"loss": 0.8768, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 165.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.825222373008728, |
|
"eval_runtime": 0.8436, |
|
"eval_samples_per_second": 82.974, |
|
"eval_steps_per_second": 2.371, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 166.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8247527480125427, |
|
"eval_runtime": 0.6665, |
|
"eval_samples_per_second": 105.023, |
|
"eval_steps_per_second": 3.001, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 167.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8242577910423279, |
|
"eval_runtime": 0.6669, |
|
"eval_samples_per_second": 104.971, |
|
"eval_steps_per_second": 2.999, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 168.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8237206339836121, |
|
"eval_runtime": 0.8327, |
|
"eval_samples_per_second": 84.06, |
|
"eval_steps_per_second": 2.402, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 169.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8231467604637146, |
|
"eval_runtime": 0.6532, |
|
"eval_samples_per_second": 107.163, |
|
"eval_steps_per_second": 3.062, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.8519, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 170.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8226965665817261, |
|
"eval_runtime": 0.6591, |
|
"eval_samples_per_second": 106.199, |
|
"eval_steps_per_second": 3.034, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 171.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.822342038154602, |
|
"eval_runtime": 0.8214, |
|
"eval_samples_per_second": 85.216, |
|
"eval_steps_per_second": 2.435, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 172.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.822126030921936, |
|
"eval_runtime": 0.6612, |
|
"eval_samples_per_second": 105.861, |
|
"eval_steps_per_second": 3.025, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 173.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8220161199569702, |
|
"eval_runtime": 0.6469, |
|
"eval_samples_per_second": 108.212, |
|
"eval_steps_per_second": 3.092, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 174.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8218111991882324, |
|
"eval_runtime": 0.8067, |
|
"eval_samples_per_second": 86.769, |
|
"eval_steps_per_second": 2.479, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"learning_rate": 1.5625e-05, |
|
"loss": 0.92, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 175.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.821461021900177, |
|
"eval_runtime": 0.6484, |
|
"eval_samples_per_second": 107.962, |
|
"eval_steps_per_second": 3.085, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 176.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8210566639900208, |
|
"eval_runtime": 0.6645, |
|
"eval_samples_per_second": 105.342, |
|
"eval_steps_per_second": 3.01, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 177.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8207017183303833, |
|
"eval_runtime": 0.8152, |
|
"eval_samples_per_second": 85.873, |
|
"eval_steps_per_second": 2.454, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 178.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8204047679901123, |
|
"eval_runtime": 0.7773, |
|
"eval_samples_per_second": 90.05, |
|
"eval_steps_per_second": 2.573, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 179.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8200381398200989, |
|
"eval_runtime": 0.6533, |
|
"eval_samples_per_second": 107.15, |
|
"eval_steps_per_second": 3.061, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.879, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 180.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8197112083435059, |
|
"eval_runtime": 0.8254, |
|
"eval_samples_per_second": 84.803, |
|
"eval_steps_per_second": 2.423, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 181.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8194140195846558, |
|
"eval_runtime": 0.6736, |
|
"eval_samples_per_second": 103.918, |
|
"eval_steps_per_second": 2.969, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 182.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8190609812736511, |
|
"eval_runtime": 0.6501, |
|
"eval_samples_per_second": 107.669, |
|
"eval_steps_per_second": 3.076, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 183.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 0.8187218308448792, |
|
"eval_runtime": 0.7205, |
|
"eval_samples_per_second": 97.148, |
|
"eval_steps_per_second": 2.776, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 184.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8184635639190674, |
|
"eval_runtime": 0.656, |
|
"eval_samples_per_second": 106.712, |
|
"eval_steps_per_second": 3.049, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"learning_rate": 9.375000000000001e-06, |
|
"loss": 0.8893, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 185.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8182028532028198, |
|
"eval_runtime": 0.6563, |
|
"eval_samples_per_second": 106.666, |
|
"eval_steps_per_second": 3.048, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 186.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8179557919502258, |
|
"eval_runtime": 0.6961, |
|
"eval_samples_per_second": 100.563, |
|
"eval_steps_per_second": 2.873, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 187.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8177469372749329, |
|
"eval_runtime": 0.6584, |
|
"eval_samples_per_second": 106.311, |
|
"eval_steps_per_second": 3.037, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 188.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8175888657569885, |
|
"eval_runtime": 0.6728, |
|
"eval_samples_per_second": 104.046, |
|
"eval_steps_per_second": 2.973, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 189.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8174628615379333, |
|
"eval_runtime": 0.661, |
|
"eval_samples_per_second": 105.894, |
|
"eval_steps_per_second": 3.026, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.8501, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 190.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8172903656959534, |
|
"eval_runtime": 0.6643, |
|
"eval_samples_per_second": 105.379, |
|
"eval_steps_per_second": 3.011, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 191.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8171139359474182, |
|
"eval_runtime": 0.7224, |
|
"eval_samples_per_second": 96.898, |
|
"eval_steps_per_second": 2.769, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 192.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8169858455657959, |
|
"eval_runtime": 0.6822, |
|
"eval_samples_per_second": 102.605, |
|
"eval_steps_per_second": 2.932, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 193.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8169211149215698, |
|
"eval_runtime": 0.6488, |
|
"eval_samples_per_second": 107.887, |
|
"eval_steps_per_second": 3.082, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 194.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8168790340423584, |
|
"eval_runtime": 0.8355, |
|
"eval_samples_per_second": 83.778, |
|
"eval_steps_per_second": 2.394, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"learning_rate": 3.125e-06, |
|
"loss": 0.8611, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 195.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8168440461158752, |
|
"eval_runtime": 0.6488, |
|
"eval_samples_per_second": 107.884, |
|
"eval_steps_per_second": 3.082, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 196.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8168230056762695, |
|
"eval_runtime": 0.6602, |
|
"eval_samples_per_second": 106.026, |
|
"eval_steps_per_second": 3.029, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 197.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8167951107025146, |
|
"eval_runtime": 0.8588, |
|
"eval_samples_per_second": 81.511, |
|
"eval_steps_per_second": 2.329, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 198.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8167835474014282, |
|
"eval_runtime": 0.6762, |
|
"eval_samples_per_second": 103.513, |
|
"eval_steps_per_second": 2.958, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 199.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8167732954025269, |
|
"eval_runtime": 0.6596, |
|
"eval_samples_per_second": 106.128, |
|
"eval_steps_per_second": 3.032, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.8881, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.8167622089385986, |
|
"eval_runtime": 0.844, |
|
"eval_samples_per_second": 82.939, |
|
"eval_steps_per_second": 2.37, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"step": 400, |
|
"total_flos": 2.2371640252416e+18, |
|
"train_loss": 0.9259392237663269, |
|
"train_runtime": 1042.9233, |
|
"train_samples_per_second": 86.296, |
|
"train_steps_per_second": 0.384 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 400, |
|
"num_train_epochs": 200, |
|
"save_steps": 500, |
|
"total_flos": 2.2371640252416e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|