|
{ |
|
"best_metric": 0.6601941747572816, |
|
"best_model_checkpoint": "videomae-large-finetuned-right-hand-conflab-v1/checkpoint-708", |
|
"epoch": 14.042022792022792, |
|
"eval_steps": 500, |
|
"global_step": 885, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.007122507122507123, |
|
"grad_norm": 12.899408340454102, |
|
"learning_rate": 3.5460992907801423e-06, |
|
"loss": 2.0965, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.014245014245014245, |
|
"grad_norm": 9.095556259155273, |
|
"learning_rate": 7.092198581560285e-06, |
|
"loss": 2.1002, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.021367521367521368, |
|
"grad_norm": 11.837288856506348, |
|
"learning_rate": 1.0638297872340426e-05, |
|
"loss": 1.9683, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02849002849002849, |
|
"grad_norm": 8.063945770263672, |
|
"learning_rate": 1.418439716312057e-05, |
|
"loss": 1.9838, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03561253561253561, |
|
"grad_norm": 7.636847972869873, |
|
"learning_rate": 1.773049645390071e-05, |
|
"loss": 2.0232, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04202279202279202, |
|
"eval_accuracy": 0.1941747572815534, |
|
"eval_loss": 1.9421881437301636, |
|
"eval_runtime": 24.149, |
|
"eval_samples_per_second": 8.53, |
|
"eval_steps_per_second": 0.538, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.0007122507122508, |
|
"grad_norm": 7.21124267578125, |
|
"learning_rate": 2.1276595744680852e-05, |
|
"loss": 1.981, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0078347578347577, |
|
"grad_norm": 8.352581977844238, |
|
"learning_rate": 2.4822695035460995e-05, |
|
"loss": 1.9465, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.014957264957265, |
|
"grad_norm": 8.361379623413086, |
|
"learning_rate": 2.836879432624114e-05, |
|
"loss": 1.962, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.022079772079772, |
|
"grad_norm": 5.388889312744141, |
|
"learning_rate": 3.191489361702128e-05, |
|
"loss": 1.976, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0292022792022792, |
|
"grad_norm": 5.553725719451904, |
|
"learning_rate": 3.546099290780142e-05, |
|
"loss": 2.0058, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0363247863247864, |
|
"grad_norm": 6.278877258300781, |
|
"learning_rate": 3.900709219858156e-05, |
|
"loss": 1.8426, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.042022792022792, |
|
"eval_accuracy": 0.33980582524271846, |
|
"eval_loss": 1.7418017387390137, |
|
"eval_runtime": 19.8724, |
|
"eval_samples_per_second": 10.366, |
|
"eval_steps_per_second": 0.654, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 2.0014245014245016, |
|
"grad_norm": 5.7616729736328125, |
|
"learning_rate": 4.2553191489361704e-05, |
|
"loss": 1.9192, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0085470085470085, |
|
"grad_norm": 6.886305332183838, |
|
"learning_rate": 4.609929078014185e-05, |
|
"loss": 1.8077, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.0156695156695155, |
|
"grad_norm": 12.934743881225586, |
|
"learning_rate": 4.964539007092199e-05, |
|
"loss": 1.6519, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.022792022792023, |
|
"grad_norm": 9.393815994262695, |
|
"learning_rate": 4.96437054631829e-05, |
|
"loss": 1.6934, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.02991452991453, |
|
"grad_norm": 10.663277626037598, |
|
"learning_rate": 4.924782264449723e-05, |
|
"loss": 1.7591, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.037037037037037, |
|
"grad_norm": 6.719297885894775, |
|
"learning_rate": 4.885193982581156e-05, |
|
"loss": 1.7424, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.042022792022792, |
|
"eval_accuracy": 0.4174757281553398, |
|
"eval_loss": 1.6896483898162842, |
|
"eval_runtime": 17.8016, |
|
"eval_samples_per_second": 11.572, |
|
"eval_steps_per_second": 0.73, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 3.002136752136752, |
|
"grad_norm": 5.332315921783447, |
|
"learning_rate": 4.845605700712589e-05, |
|
"loss": 1.7511, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.009259259259259, |
|
"grad_norm": 9.056990623474121, |
|
"learning_rate": 4.806017418844022e-05, |
|
"loss": 1.6771, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 3.0163817663817665, |
|
"grad_norm": 10.588340759277344, |
|
"learning_rate": 4.766429136975455e-05, |
|
"loss": 1.4029, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 3.0235042735042734, |
|
"grad_norm": 17.486988067626953, |
|
"learning_rate": 4.7268408551068886e-05, |
|
"loss": 1.4862, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 3.030626780626781, |
|
"grad_norm": 7.175242900848389, |
|
"learning_rate": 4.687252573238321e-05, |
|
"loss": 1.4282, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 3.0377492877492878, |
|
"grad_norm": 8.013775825500488, |
|
"learning_rate": 4.647664291369755e-05, |
|
"loss": 1.2206, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 3.042022792022792, |
|
"eval_accuracy": 0.44660194174757284, |
|
"eval_loss": 1.628009557723999, |
|
"eval_runtime": 21.5993, |
|
"eval_samples_per_second": 9.537, |
|
"eval_steps_per_second": 0.602, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 4.002849002849003, |
|
"grad_norm": 8.972482681274414, |
|
"learning_rate": 4.6080760095011874e-05, |
|
"loss": 1.6023, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 4.00997150997151, |
|
"grad_norm": 7.669183731079102, |
|
"learning_rate": 4.568487727632621e-05, |
|
"loss": 1.1725, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 4.017094017094017, |
|
"grad_norm": 7.465898513793945, |
|
"learning_rate": 4.528899445764054e-05, |
|
"loss": 1.2957, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 4.024216524216524, |
|
"grad_norm": 5.810666561126709, |
|
"learning_rate": 4.4893111638954874e-05, |
|
"loss": 1.3387, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 4.031339031339031, |
|
"grad_norm": 8.172585487365723, |
|
"learning_rate": 4.44972288202692e-05, |
|
"loss": 1.2668, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 4.038461538461538, |
|
"grad_norm": 7.007075309753418, |
|
"learning_rate": 4.4101346001583535e-05, |
|
"loss": 1.0738, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 4.042022792022792, |
|
"eval_accuracy": 0.5825242718446602, |
|
"eval_loss": 1.2310322523117065, |
|
"eval_runtime": 18.9007, |
|
"eval_samples_per_second": 10.899, |
|
"eval_steps_per_second": 0.688, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 5.003561253561253, |
|
"grad_norm": 4.775450229644775, |
|
"learning_rate": 4.370546318289787e-05, |
|
"loss": 1.0455, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 5.010683760683761, |
|
"grad_norm": 9.440917015075684, |
|
"learning_rate": 4.3309580364212195e-05, |
|
"loss": 0.99, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 5.017806267806268, |
|
"grad_norm": 5.092905521392822, |
|
"learning_rate": 4.291369754552653e-05, |
|
"loss": 0.9873, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 5.0249287749287745, |
|
"grad_norm": 8.457822799682617, |
|
"learning_rate": 4.2517814726840856e-05, |
|
"loss": 0.8012, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 5.032051282051282, |
|
"grad_norm": 6.717366695404053, |
|
"learning_rate": 4.212193190815519e-05, |
|
"loss": 0.9262, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 5.039173789173789, |
|
"grad_norm": 6.962019920349121, |
|
"learning_rate": 4.172604908946952e-05, |
|
"loss": 1.0054, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 5.042022792022792, |
|
"eval_accuracy": 0.558252427184466, |
|
"eval_loss": 1.3242673873901367, |
|
"eval_runtime": 20.0158, |
|
"eval_samples_per_second": 10.292, |
|
"eval_steps_per_second": 0.649, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 6.004273504273504, |
|
"grad_norm": 17.063520431518555, |
|
"learning_rate": 4.133016627078385e-05, |
|
"loss": 0.8909, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 6.011396011396012, |
|
"grad_norm": 8.150900840759277, |
|
"learning_rate": 4.093428345209818e-05, |
|
"loss": 0.89, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 6.018518518518518, |
|
"grad_norm": 8.059175491333008, |
|
"learning_rate": 4.053840063341251e-05, |
|
"loss": 0.7925, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 6.0256410256410255, |
|
"grad_norm": 7.513158798217773, |
|
"learning_rate": 4.0142517814726843e-05, |
|
"loss": 0.853, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 6.032763532763533, |
|
"grad_norm": 6.1937971115112305, |
|
"learning_rate": 3.974663499604117e-05, |
|
"loss": 0.7338, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 6.0398860398860394, |
|
"grad_norm": 5.745666980743408, |
|
"learning_rate": 3.9350752177355504e-05, |
|
"loss": 0.782, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 6.042022792022792, |
|
"eval_accuracy": 0.6359223300970874, |
|
"eval_loss": 1.1890981197357178, |
|
"eval_runtime": 19.1994, |
|
"eval_samples_per_second": 10.73, |
|
"eval_steps_per_second": 0.677, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 7.004985754985755, |
|
"grad_norm": 7.788235187530518, |
|
"learning_rate": 3.895486935866984e-05, |
|
"loss": 0.745, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 7.012108262108262, |
|
"grad_norm": 8.95632553100586, |
|
"learning_rate": 3.8558986539984164e-05, |
|
"loss": 0.6315, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 7.019230769230769, |
|
"grad_norm": 9.59426498413086, |
|
"learning_rate": 3.81631037212985e-05, |
|
"loss": 0.6792, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 7.0263532763532766, |
|
"grad_norm": 7.637509822845459, |
|
"learning_rate": 3.7767220902612825e-05, |
|
"loss": 0.5733, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 7.033475783475783, |
|
"grad_norm": 10.775083541870117, |
|
"learning_rate": 3.737133808392716e-05, |
|
"loss": 0.7303, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 7.0405982905982905, |
|
"grad_norm": 6.580932140350342, |
|
"learning_rate": 3.6975455265241485e-05, |
|
"loss": 0.599, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 7.042022792022792, |
|
"eval_accuracy": 0.6504854368932039, |
|
"eval_loss": 1.193009376525879, |
|
"eval_runtime": 20.2931, |
|
"eval_samples_per_second": 10.151, |
|
"eval_steps_per_second": 0.641, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 8.005698005698006, |
|
"grad_norm": 8.682097434997559, |
|
"learning_rate": 3.657957244655582e-05, |
|
"loss": 0.4704, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 8.012820512820513, |
|
"grad_norm": 8.527168273925781, |
|
"learning_rate": 3.618368962787015e-05, |
|
"loss": 0.6392, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 8.01994301994302, |
|
"grad_norm": 6.198835849761963, |
|
"learning_rate": 3.578780680918448e-05, |
|
"loss": 0.3972, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 8.027065527065528, |
|
"grad_norm": 6.177005290985107, |
|
"learning_rate": 3.539192399049881e-05, |
|
"loss": 0.4873, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 8.034188034188034, |
|
"grad_norm": 9.330336570739746, |
|
"learning_rate": 3.4996041171813146e-05, |
|
"loss": 0.4534, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 8.04131054131054, |
|
"grad_norm": 5.25494384765625, |
|
"learning_rate": 3.460015835312748e-05, |
|
"loss": 0.6782, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 8.042022792022792, |
|
"eval_accuracy": 0.6359223300970874, |
|
"eval_loss": 1.2866381406784058, |
|
"eval_runtime": 19.101, |
|
"eval_samples_per_second": 10.785, |
|
"eval_steps_per_second": 0.681, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 9.006410256410257, |
|
"grad_norm": 5.766805648803711, |
|
"learning_rate": 3.4204275534441806e-05, |
|
"loss": 0.392, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 9.013532763532764, |
|
"grad_norm": 11.340324401855469, |
|
"learning_rate": 3.380839271575614e-05, |
|
"loss": 0.4548, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 9.02065527065527, |
|
"grad_norm": 10.191842079162598, |
|
"learning_rate": 3.3412509897070474e-05, |
|
"loss": 0.3105, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 9.027777777777779, |
|
"grad_norm": 5.949177265167236, |
|
"learning_rate": 3.30166270783848e-05, |
|
"loss": 0.4979, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 9.034900284900285, |
|
"grad_norm": 11.043278694152832, |
|
"learning_rate": 3.2620744259699134e-05, |
|
"loss": 0.3972, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 9.042022792022792, |
|
"grad_norm": 16.354900360107422, |
|
"learning_rate": 3.222486144101346e-05, |
|
"loss": 0.3033, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 9.042022792022792, |
|
"eval_accuracy": 0.5776699029126213, |
|
"eval_loss": 1.423584222793579, |
|
"eval_runtime": 20.9024, |
|
"eval_samples_per_second": 9.855, |
|
"eval_steps_per_second": 0.622, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 10.007122507122507, |
|
"grad_norm": 1.9087872505187988, |
|
"learning_rate": 3.1828978622327794e-05, |
|
"loss": 0.1958, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 10.014245014245015, |
|
"grad_norm": 6.443134307861328, |
|
"learning_rate": 3.143309580364212e-05, |
|
"loss": 0.2865, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 10.021367521367521, |
|
"grad_norm": 5.271445274353027, |
|
"learning_rate": 3.1037212984956455e-05, |
|
"loss": 0.2709, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 10.028490028490028, |
|
"grad_norm": 5.430334568023682, |
|
"learning_rate": 3.064133016627079e-05, |
|
"loss": 0.3552, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 10.035612535612536, |
|
"grad_norm": 8.00438117980957, |
|
"learning_rate": 3.0245447347585115e-05, |
|
"loss": 0.2236, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 10.042022792022792, |
|
"eval_accuracy": 0.6553398058252428, |
|
"eval_loss": 1.3206462860107422, |
|
"eval_runtime": 18.1657, |
|
"eval_samples_per_second": 11.34, |
|
"eval_steps_per_second": 0.716, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 11.000712250712251, |
|
"grad_norm": 8.367637634277344, |
|
"learning_rate": 2.984956452889945e-05, |
|
"loss": 0.2997, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 11.007834757834758, |
|
"grad_norm": 4.96151065826416, |
|
"learning_rate": 2.9453681710213776e-05, |
|
"loss": 0.2366, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 11.014957264957266, |
|
"grad_norm": 4.967086315155029, |
|
"learning_rate": 2.905779889152811e-05, |
|
"loss": 0.2191, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 11.022079772079772, |
|
"grad_norm": 5.6478095054626465, |
|
"learning_rate": 2.8661916072842436e-05, |
|
"loss": 0.2042, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 11.029202279202279, |
|
"grad_norm": 15.652437210083008, |
|
"learning_rate": 2.826603325415677e-05, |
|
"loss": 0.2747, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 11.036324786324787, |
|
"grad_norm": 4.030658721923828, |
|
"learning_rate": 2.7870150435471103e-05, |
|
"loss": 0.1756, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 11.042022792022792, |
|
"eval_accuracy": 0.6601941747572816, |
|
"eval_loss": 1.5112863779067993, |
|
"eval_runtime": 18.776, |
|
"eval_samples_per_second": 10.971, |
|
"eval_steps_per_second": 0.692, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 12.001424501424502, |
|
"grad_norm": 3.84013295173645, |
|
"learning_rate": 2.7474267616785433e-05, |
|
"loss": 0.1714, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 12.008547008547009, |
|
"grad_norm": 5.428359508514404, |
|
"learning_rate": 2.7078384798099763e-05, |
|
"loss": 0.2056, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 12.015669515669515, |
|
"grad_norm": 5.529027938842773, |
|
"learning_rate": 2.6682501979414094e-05, |
|
"loss": 0.1018, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 12.022792022792023, |
|
"grad_norm": 5.976778984069824, |
|
"learning_rate": 2.6286619160728427e-05, |
|
"loss": 0.0961, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 12.02991452991453, |
|
"grad_norm": 6.043054580688477, |
|
"learning_rate": 2.5890736342042754e-05, |
|
"loss": 0.2393, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 12.037037037037036, |
|
"grad_norm": 4.8577656745910645, |
|
"learning_rate": 2.5494853523357088e-05, |
|
"loss": 0.1341, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 12.042022792022792, |
|
"eval_accuracy": 0.6407766990291263, |
|
"eval_loss": 1.6544133424758911, |
|
"eval_runtime": 18.7553, |
|
"eval_samples_per_second": 10.984, |
|
"eval_steps_per_second": 0.693, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 13.002136752136753, |
|
"grad_norm": 3.0963165760040283, |
|
"learning_rate": 2.509897070467142e-05, |
|
"loss": 0.1632, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 13.00925925925926, |
|
"grad_norm": 1.0908960103988647, |
|
"learning_rate": 2.4703087885985748e-05, |
|
"loss": 0.1377, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 13.016381766381766, |
|
"grad_norm": 13.51460075378418, |
|
"learning_rate": 2.4307205067300078e-05, |
|
"loss": 0.1605, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 13.023504273504274, |
|
"grad_norm": 3.4943864345550537, |
|
"learning_rate": 2.3911322248614412e-05, |
|
"loss": 0.1576, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 13.03062678062678, |
|
"grad_norm": 3.6334426403045654, |
|
"learning_rate": 2.3515439429928742e-05, |
|
"loss": 0.1366, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 13.037749287749287, |
|
"grad_norm": 1.571234107017517, |
|
"learning_rate": 2.3119556611243072e-05, |
|
"loss": 0.0823, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 13.042022792022792, |
|
"eval_accuracy": 0.6553398058252428, |
|
"eval_loss": 1.61236572265625, |
|
"eval_runtime": 18.782, |
|
"eval_samples_per_second": 10.968, |
|
"eval_steps_per_second": 0.692, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 14.002849002849002, |
|
"grad_norm": 6.992094039916992, |
|
"learning_rate": 2.2723673792557402e-05, |
|
"loss": 0.0825, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 14.00997150997151, |
|
"grad_norm": 0.7775176763534546, |
|
"learning_rate": 2.2327790973871736e-05, |
|
"loss": 0.0993, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 14.017094017094017, |
|
"grad_norm": 5.0337700843811035, |
|
"learning_rate": 2.1931908155186066e-05, |
|
"loss": 0.0713, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 14.024216524216524, |
|
"grad_norm": 0.6653324365615845, |
|
"learning_rate": 2.1536025336500396e-05, |
|
"loss": 0.1065, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 14.031339031339032, |
|
"grad_norm": 11.971256256103516, |
|
"learning_rate": 2.114014251781473e-05, |
|
"loss": 0.144, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 14.038461538461538, |
|
"grad_norm": 10.947636604309082, |
|
"learning_rate": 2.074425969912906e-05, |
|
"loss": 0.0691, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 14.042022792022792, |
|
"eval_accuracy": 0.6456310679611651, |
|
"eval_loss": 1.8230090141296387, |
|
"eval_runtime": 17.8025, |
|
"eval_samples_per_second": 11.571, |
|
"eval_steps_per_second": 0.73, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 14.042022792022792, |
|
"step": 885, |
|
"total_flos": 6.204645759270519e+19, |
|
"train_loss": 0.8409665932847281, |
|
"train_runtime": 3290.4789, |
|
"train_samples_per_second": 6.827, |
|
"train_steps_per_second": 0.427 |
|
}, |
|
{ |
|
"epoch": 14.042022792022792, |
|
"eval_accuracy": 0.6146341463414634, |
|
"eval_loss": 1.5203598737716675, |
|
"eval_runtime": 24.6199, |
|
"eval_samples_per_second": 8.327, |
|
"eval_steps_per_second": 0.528, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 14.042022792022792, |
|
"eval_accuracy": 0.6146341463414634, |
|
"eval_loss": 1.5239903926849365, |
|
"eval_runtime": 17.6083, |
|
"eval_samples_per_second": 11.642, |
|
"eval_steps_per_second": 0.738, |
|
"step": 885 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1404, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 6.204645759270519e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|