|
{ |
|
"best_metric": 0.8169014084507042, |
|
"best_model_checkpoint": "deit-base-distilled-patch16-224-65-fold4/checkpoint-91", |
|
"epoch": 92.3076923076923, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.4647887323943662, |
|
"eval_loss": 0.7266488075256348, |
|
"eval_runtime": 0.8434, |
|
"eval_samples_per_second": 84.184, |
|
"eval_steps_per_second": 3.557, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"eval_accuracy": 0.5211267605633803, |
|
"eval_loss": 0.8115941286087036, |
|
"eval_runtime": 0.8919, |
|
"eval_samples_per_second": 79.608, |
|
"eval_steps_per_second": 3.364, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"eval_accuracy": 0.4647887323943662, |
|
"eval_loss": 0.7081143856048584, |
|
"eval_runtime": 0.8712, |
|
"eval_samples_per_second": 81.496, |
|
"eval_steps_per_second": 3.443, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 2.1958322525024414, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.7173, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6644501090049744, |
|
"eval_runtime": 0.8954, |
|
"eval_samples_per_second": 79.298, |
|
"eval_steps_per_second": 3.351, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.5915492957746479, |
|
"eval_loss": 0.6440630555152893, |
|
"eval_runtime": 0.8941, |
|
"eval_samples_per_second": 79.413, |
|
"eval_steps_per_second": 3.355, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"eval_accuracy": 0.676056338028169, |
|
"eval_loss": 0.6400186419487, |
|
"eval_runtime": 0.933, |
|
"eval_samples_per_second": 76.096, |
|
"eval_steps_per_second": 3.215, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 2.7608325481414795, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6351, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"eval_accuracy": 0.6619718309859155, |
|
"eval_loss": 0.6054678559303284, |
|
"eval_runtime": 0.9387, |
|
"eval_samples_per_second": 75.638, |
|
"eval_steps_per_second": 3.196, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5352112676056338, |
|
"eval_loss": 0.7769902944564819, |
|
"eval_runtime": 0.9329, |
|
"eval_samples_per_second": 76.108, |
|
"eval_steps_per_second": 3.216, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.6901408450704225, |
|
"eval_loss": 0.6259447932243347, |
|
"eval_runtime": 0.924, |
|
"eval_samples_per_second": 76.842, |
|
"eval_steps_per_second": 3.247, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 5.699825286865234, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5434, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.846153846153847, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.5889422297477722, |
|
"eval_runtime": 0.9148, |
|
"eval_samples_per_second": 77.612, |
|
"eval_steps_per_second": 3.279, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"eval_accuracy": 0.647887323943662, |
|
"eval_loss": 0.7283326387405396, |
|
"eval_runtime": 0.9062, |
|
"eval_samples_per_second": 78.347, |
|
"eval_steps_per_second": 3.31, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.647887323943662, |
|
"eval_loss": 0.6897829174995422, |
|
"eval_runtime": 0.9341, |
|
"eval_samples_per_second": 76.013, |
|
"eval_steps_per_second": 3.212, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 2.502607583999634, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.4861, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.6428806781768799, |
|
"eval_runtime": 0.9382, |
|
"eval_samples_per_second": 75.679, |
|
"eval_steps_per_second": 3.198, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"eval_accuracy": 0.6619718309859155, |
|
"eval_loss": 0.691542387008667, |
|
"eval_runtime": 0.932, |
|
"eval_samples_per_second": 76.179, |
|
"eval_steps_per_second": 3.219, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.76923076923077, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.5702247619628906, |
|
"eval_runtime": 0.9242, |
|
"eval_samples_per_second": 76.822, |
|
"eval_steps_per_second": 3.246, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 5.8594255447387695, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4285, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.6356058120727539, |
|
"eval_runtime": 0.919, |
|
"eval_samples_per_second": 77.258, |
|
"eval_steps_per_second": 3.264, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.676056338028169, |
|
"eval_loss": 0.6981013417243958, |
|
"eval_runtime": 0.9247, |
|
"eval_samples_per_second": 76.785, |
|
"eval_steps_per_second": 3.244, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.846153846153847, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.5218324661254883, |
|
"eval_runtime": 0.917, |
|
"eval_samples_per_second": 77.425, |
|
"eval_steps_per_second": 3.271, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 6.547220706939697, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.3781, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.76923076923077, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.534016489982605, |
|
"eval_runtime": 0.914, |
|
"eval_samples_per_second": 77.678, |
|
"eval_steps_per_second": 3.282, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.676056338028169, |
|
"eval_loss": 0.7611135244369507, |
|
"eval_runtime": 0.9206, |
|
"eval_samples_per_second": 77.126, |
|
"eval_steps_per_second": 3.259, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.5939193964004517, |
|
"eval_runtime": 0.9415, |
|
"eval_samples_per_second": 75.408, |
|
"eval_steps_per_second": 3.186, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 4.307927131652832, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.3516, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.846153846153847, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.61859130859375, |
|
"eval_runtime": 0.9317, |
|
"eval_samples_per_second": 76.201, |
|
"eval_steps_per_second": 3.22, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.76923076923077, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.712211549282074, |
|
"eval_runtime": 0.9272, |
|
"eval_samples_per_second": 76.571, |
|
"eval_steps_per_second": 3.235, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5930981040000916, |
|
"eval_runtime": 0.9231, |
|
"eval_samples_per_second": 76.916, |
|
"eval_steps_per_second": 3.25, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 5.136722087860107, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.296, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.6901408450704225, |
|
"eval_loss": 0.6304548978805542, |
|
"eval_runtime": 0.9177, |
|
"eval_samples_per_second": 77.366, |
|
"eval_steps_per_second": 3.269, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.846153846153847, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.8947206735610962, |
|
"eval_runtime": 0.9322, |
|
"eval_samples_per_second": 76.164, |
|
"eval_steps_per_second": 3.218, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.76923076923077, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.6216529011726379, |
|
"eval_runtime": 0.9208, |
|
"eval_samples_per_second": 77.108, |
|
"eval_steps_per_second": 3.258, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 2.791210651397705, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.2741, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.7217584848403931, |
|
"eval_runtime": 0.9386, |
|
"eval_samples_per_second": 75.642, |
|
"eval_steps_per_second": 3.196, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6687091588973999, |
|
"eval_runtime": 0.9304, |
|
"eval_samples_per_second": 76.314, |
|
"eval_steps_per_second": 3.225, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.846153846153847, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6647565960884094, |
|
"eval_runtime": 0.9206, |
|
"eval_samples_per_second": 77.123, |
|
"eval_steps_per_second": 3.259, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 4.038685321807861, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2559, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.64328533411026, |
|
"eval_runtime": 0.9253, |
|
"eval_samples_per_second": 76.729, |
|
"eval_steps_per_second": 3.242, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.6673524975776672, |
|
"eval_runtime": 0.9299, |
|
"eval_samples_per_second": 76.356, |
|
"eval_steps_per_second": 3.226, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.6642715334892273, |
|
"eval_runtime": 0.9221, |
|
"eval_samples_per_second": 76.995, |
|
"eval_steps_per_second": 3.253, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 2.9196064472198486, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.2001, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.6247330904006958, |
|
"eval_runtime": 0.923, |
|
"eval_samples_per_second": 76.922, |
|
"eval_steps_per_second": 3.25, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.76923076923077, |
|
"eval_accuracy": 0.6901408450704225, |
|
"eval_loss": 0.634434163570404, |
|
"eval_runtime": 0.9207, |
|
"eval_samples_per_second": 77.116, |
|
"eval_steps_per_second": 3.258, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7071972489356995, |
|
"eval_runtime": 0.9162, |
|
"eval_samples_per_second": 77.491, |
|
"eval_steps_per_second": 3.274, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 2.7139010429382324, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.1728, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.7145668864250183, |
|
"eval_runtime": 0.9546, |
|
"eval_samples_per_second": 74.38, |
|
"eval_steps_per_second": 3.143, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 37.84615384615385, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.8212233781814575, |
|
"eval_runtime": 0.9206, |
|
"eval_samples_per_second": 77.122, |
|
"eval_steps_per_second": 3.259, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 38.76923076923077, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.7901431322097778, |
|
"eval_runtime": 0.9408, |
|
"eval_samples_per_second": 75.465, |
|
"eval_steps_per_second": 3.189, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 3.7214014530181885, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.2109, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.8235028982162476, |
|
"eval_runtime": 0.9237, |
|
"eval_samples_per_second": 76.863, |
|
"eval_steps_per_second": 3.248, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.6901408450704225, |
|
"eval_loss": 0.9196304678916931, |
|
"eval_runtime": 0.9201, |
|
"eval_samples_per_second": 77.162, |
|
"eval_steps_per_second": 3.26, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 41.84615384615385, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7758485078811646, |
|
"eval_runtime": 0.918, |
|
"eval_samples_per_second": 77.345, |
|
"eval_steps_per_second": 3.268, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 42.76923076923077, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.7692318558692932, |
|
"eval_runtime": 0.9271, |
|
"eval_samples_per_second": 76.58, |
|
"eval_steps_per_second": 3.236, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 2.981480121612549, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.1634, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.830990731716156, |
|
"eval_runtime": 0.9143, |
|
"eval_samples_per_second": 77.651, |
|
"eval_steps_per_second": 3.281, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.7550302147865295, |
|
"eval_runtime": 0.9175, |
|
"eval_samples_per_second": 77.387, |
|
"eval_steps_per_second": 3.27, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 45.84615384615385, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.7645807862281799, |
|
"eval_runtime": 0.9201, |
|
"eval_samples_per_second": 77.165, |
|
"eval_steps_per_second": 3.26, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 2.219327926635742, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.148, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 46.76923076923077, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7207580804824829, |
|
"eval_runtime": 0.9248, |
|
"eval_samples_per_second": 76.77, |
|
"eval_steps_per_second": 3.244, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7324273586273193, |
|
"eval_runtime": 0.9287, |
|
"eval_samples_per_second": 76.454, |
|
"eval_steps_per_second": 3.23, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7855945229530334, |
|
"eval_runtime": 0.923, |
|
"eval_samples_per_second": 76.922, |
|
"eval_steps_per_second": 3.25, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 3.3428738117218018, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.1568, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 49.84615384615385, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.8032997250556946, |
|
"eval_runtime": 0.9319, |
|
"eval_samples_per_second": 76.191, |
|
"eval_steps_per_second": 3.219, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.9006530046463013, |
|
"eval_runtime": 0.9271, |
|
"eval_samples_per_second": 76.582, |
|
"eval_steps_per_second": 3.236, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.817884624004364, |
|
"eval_runtime": 0.926, |
|
"eval_samples_per_second": 76.671, |
|
"eval_steps_per_second": 3.24, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 2.3081116676330566, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.1659, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7775102853775024, |
|
"eval_runtime": 0.9343, |
|
"eval_samples_per_second": 75.993, |
|
"eval_steps_per_second": 3.211, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7214329838752747, |
|
"eval_runtime": 0.9316, |
|
"eval_samples_per_second": 76.215, |
|
"eval_steps_per_second": 3.22, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 54.76923076923077, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.7385321259498596, |
|
"eval_runtime": 0.9261, |
|
"eval_samples_per_second": 76.67, |
|
"eval_steps_per_second": 3.24, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 2.9334683418273926, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1352, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.7433763742446899, |
|
"eval_runtime": 0.9297, |
|
"eval_samples_per_second": 76.371, |
|
"eval_steps_per_second": 3.227, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.8971463441848755, |
|
"eval_runtime": 0.9262, |
|
"eval_samples_per_second": 76.656, |
|
"eval_steps_per_second": 3.239, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 57.84615384615385, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7821467518806458, |
|
"eval_runtime": 0.928, |
|
"eval_samples_per_second": 76.509, |
|
"eval_steps_per_second": 3.233, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 4.111489295959473, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.1309, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 58.76923076923077, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.7896379828453064, |
|
"eval_runtime": 0.9233, |
|
"eval_samples_per_second": 76.899, |
|
"eval_steps_per_second": 3.249, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.8339643478393555, |
|
"eval_runtime": 0.933, |
|
"eval_samples_per_second": 76.101, |
|
"eval_steps_per_second": 3.216, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.8154428005218506, |
|
"eval_runtime": 0.9292, |
|
"eval_samples_per_second": 76.406, |
|
"eval_steps_per_second": 3.228, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 4.092026233673096, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1201, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 61.84615384615385, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.8184639811515808, |
|
"eval_runtime": 0.9373, |
|
"eval_samples_per_second": 75.748, |
|
"eval_steps_per_second": 3.201, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 62.76923076923077, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9639940857887268, |
|
"eval_runtime": 0.9235, |
|
"eval_samples_per_second": 76.878, |
|
"eval_steps_per_second": 3.248, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.8484686613082886, |
|
"eval_runtime": 0.9218, |
|
"eval_samples_per_second": 77.024, |
|
"eval_steps_per_second": 3.255, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 2.0266196727752686, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1291, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.8807466626167297, |
|
"eval_runtime": 0.9258, |
|
"eval_samples_per_second": 76.694, |
|
"eval_steps_per_second": 3.241, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 65.84615384615384, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.8652527332305908, |
|
"eval_runtime": 0.9568, |
|
"eval_samples_per_second": 74.203, |
|
"eval_steps_per_second": 3.135, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 66.76923076923077, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.8744374513626099, |
|
"eval_runtime": 0.9208, |
|
"eval_samples_per_second": 77.107, |
|
"eval_steps_per_second": 3.258, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 2.0726959705352783, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.124, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.8723464012145996, |
|
"eval_runtime": 0.9161, |
|
"eval_samples_per_second": 77.501, |
|
"eval_steps_per_second": 3.275, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.8948094248771667, |
|
"eval_runtime": 0.93, |
|
"eval_samples_per_second": 76.342, |
|
"eval_steps_per_second": 3.226, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 69.84615384615384, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9777162671089172, |
|
"eval_runtime": 0.9269, |
|
"eval_samples_per_second": 76.603, |
|
"eval_steps_per_second": 3.237, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 3.592405319213867, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.1262, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.940915048122406, |
|
"eval_runtime": 0.9252, |
|
"eval_samples_per_second": 76.743, |
|
"eval_steps_per_second": 3.243, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.9617937207221985, |
|
"eval_runtime": 0.9316, |
|
"eval_samples_per_second": 76.211, |
|
"eval_steps_per_second": 3.22, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.9641876220703125, |
|
"eval_runtime": 0.9327, |
|
"eval_samples_per_second": 76.122, |
|
"eval_steps_per_second": 3.216, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 1.9709769487380981, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1036, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.9737982153892517, |
|
"eval_runtime": 0.9281, |
|
"eval_samples_per_second": 76.5, |
|
"eval_steps_per_second": 3.232, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 74.76923076923077, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.9787779450416565, |
|
"eval_runtime": 0.9363, |
|
"eval_samples_per_second": 75.831, |
|
"eval_steps_per_second": 3.204, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 1.0114330053329468, |
|
"eval_runtime": 0.9271, |
|
"eval_samples_per_second": 76.582, |
|
"eval_steps_per_second": 3.236, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 3.0993545055389404, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1183, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 1.0004260540008545, |
|
"eval_runtime": 0.9321, |
|
"eval_samples_per_second": 76.171, |
|
"eval_steps_per_second": 3.218, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 77.84615384615384, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 1.0407198667526245, |
|
"eval_runtime": 0.9175, |
|
"eval_samples_per_second": 77.387, |
|
"eval_steps_per_second": 3.27, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 78.76923076923077, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 1.1509737968444824, |
|
"eval_runtime": 0.9303, |
|
"eval_samples_per_second": 76.319, |
|
"eval_steps_per_second": 3.225, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 5.949638843536377, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.0981, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 1.0718269348144531, |
|
"eval_runtime": 0.9185, |
|
"eval_samples_per_second": 77.304, |
|
"eval_steps_per_second": 3.266, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.9988247752189636, |
|
"eval_runtime": 0.933, |
|
"eval_samples_per_second": 76.098, |
|
"eval_steps_per_second": 3.215, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 81.84615384615384, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 1.0053763389587402, |
|
"eval_runtime": 0.9278, |
|
"eval_samples_per_second": 76.524, |
|
"eval_steps_per_second": 3.233, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 82.76923076923077, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.9896395206451416, |
|
"eval_runtime": 0.9224, |
|
"eval_samples_per_second": 76.975, |
|
"eval_steps_per_second": 3.252, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 2.5808982849121094, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.106, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.9850640892982483, |
|
"eval_runtime": 0.9326, |
|
"eval_samples_per_second": 76.134, |
|
"eval_steps_per_second": 3.217, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.9769949913024902, |
|
"eval_runtime": 0.9194, |
|
"eval_samples_per_second": 77.228, |
|
"eval_steps_per_second": 3.263, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 85.84615384615384, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9622512459754944, |
|
"eval_runtime": 0.9188, |
|
"eval_samples_per_second": 77.276, |
|
"eval_steps_per_second": 3.265, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 3.007988214492798, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.114, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 86.76923076923077, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.9664495587348938, |
|
"eval_runtime": 0.9429, |
|
"eval_samples_per_second": 75.301, |
|
"eval_steps_per_second": 3.182, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.9780421853065491, |
|
"eval_runtime": 0.9309, |
|
"eval_samples_per_second": 76.269, |
|
"eval_steps_per_second": 3.223, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9670152068138123, |
|
"eval_runtime": 0.927, |
|
"eval_samples_per_second": 76.593, |
|
"eval_steps_per_second": 3.236, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 2.9891533851623535, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.1157, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 89.84615384615384, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.9586439728736877, |
|
"eval_runtime": 0.9232, |
|
"eval_samples_per_second": 76.909, |
|
"eval_steps_per_second": 3.25, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"eval_accuracy": 0.7183098591549296, |
|
"eval_loss": 0.9586858749389648, |
|
"eval_runtime": 0.9402, |
|
"eval_samples_per_second": 75.515, |
|
"eval_steps_per_second": 3.191, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.9610524773597717, |
|
"eval_runtime": 0.9383, |
|
"eval_samples_per_second": 75.669, |
|
"eval_steps_per_second": 3.197, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 2.5594146251678467, |
|
"learning_rate": 0.0, |
|
"loss": 0.0834, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.9612475037574768, |
|
"eval_runtime": 0.9284, |
|
"eval_samples_per_second": 76.472, |
|
"eval_steps_per_second": 3.231, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"step": 300, |
|
"total_flos": 2.8402872494292173e+18, |
|
"train_loss": 0.23628523468971252, |
|
"train_runtime": 1593.1864, |
|
"train_samples_per_second": 24.919, |
|
"train_steps_per_second": 0.188 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.7217584848403931, |
|
"eval_runtime": 0.9525, |
|
"eval_samples_per_second": 74.537, |
|
"eval_steps_per_second": 3.149, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.8402872494292173e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|