|
{ |
|
"best_metric": 0.2595302164554596, |
|
"best_model_checkpoint": "distilbert-base-uncased-lora-intent-classification-v2/checkpoint-67716", |
|
"epoch": 9.0, |
|
"eval_steps": 500, |
|
"global_step": 67716, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06645401382243488, |
|
"grad_norm": 4.6704421043396, |
|
"learning_rate": 0.0009933545986177566, |
|
"loss": 0.6675, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13290802764486975, |
|
"grad_norm": 2.3022220134735107, |
|
"learning_rate": 0.000986709197235513, |
|
"loss": 0.4718, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19936204146730463, |
|
"grad_norm": 0.44215622544288635, |
|
"learning_rate": 0.0009800637958532696, |
|
"loss": 0.4146, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2658160552897395, |
|
"grad_norm": 0.08581192046403885, |
|
"learning_rate": 0.0009734183944710261, |
|
"loss": 0.4297, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.3322700691121744, |
|
"grad_norm": 13.087315559387207, |
|
"learning_rate": 0.0009667729930887826, |
|
"loss": 0.3776, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.39872408293460926, |
|
"grad_norm": 15.066133499145508, |
|
"learning_rate": 0.0009601275917065391, |
|
"loss": 0.4233, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.46517809675704413, |
|
"grad_norm": 0.23827387392520905, |
|
"learning_rate": 0.0009534821903242956, |
|
"loss": 0.3613, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.531632110579479, |
|
"grad_norm": 0.009319925680756569, |
|
"learning_rate": 0.0009468367889420521, |
|
"loss": 0.4269, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.5980861244019139, |
|
"grad_norm": 0.665321946144104, |
|
"learning_rate": 0.0009401913875598086, |
|
"loss": 0.3815, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.6645401382243488, |
|
"grad_norm": 3.580693483352661, |
|
"learning_rate": 0.0009335459861775651, |
|
"loss": 0.3539, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.7309941520467836, |
|
"grad_norm": 0.12289135903120041, |
|
"learning_rate": 0.0009269005847953217, |
|
"loss": 0.4112, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.7974481658692185, |
|
"grad_norm": 1.3471044301986694, |
|
"learning_rate": 0.0009202551834130782, |
|
"loss": 0.4109, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.8639021796916534, |
|
"grad_norm": 0.09887880831956863, |
|
"learning_rate": 0.0009136097820308346, |
|
"loss": 0.4508, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.9303561935140883, |
|
"grad_norm": 0.005311007611453533, |
|
"learning_rate": 0.0009069643806485912, |
|
"loss": 0.4011, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.9968102073365231, |
|
"grad_norm": 1.1049816608428955, |
|
"learning_rate": 0.0009003189792663478, |
|
"loss": 0.368, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9425867507886435, |
|
"eval_f1": 0.9421244141375861, |
|
"eval_loss": 0.3986539840698242, |
|
"eval_precision": 0.9421379340931425, |
|
"eval_recall": 0.9425867507886435, |
|
"eval_runtime": 4.728, |
|
"eval_samples_per_second": 335.238, |
|
"eval_steps_per_second": 83.968, |
|
"step": 7524 |
|
}, |
|
{ |
|
"epoch": 1.063264221158958, |
|
"grad_norm": 70.09782409667969, |
|
"learning_rate": 0.0008936735778841042, |
|
"loss": 0.3306, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.1297182349813928, |
|
"grad_norm": 0.7961419820785522, |
|
"learning_rate": 0.0008870281765018608, |
|
"loss": 0.3746, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.1961722488038278, |
|
"grad_norm": 0.060738347470760345, |
|
"learning_rate": 0.0008803827751196173, |
|
"loss": 0.4045, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.2626262626262625, |
|
"grad_norm": 0.20715029537677765, |
|
"learning_rate": 0.0008737373737373737, |
|
"loss": 0.4587, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.3290802764486975, |
|
"grad_norm": 0.08913299441337585, |
|
"learning_rate": 0.0008670919723551303, |
|
"loss": 0.4504, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.3955342902711323, |
|
"grad_norm": 0.14319421350955963, |
|
"learning_rate": 0.0008604465709728868, |
|
"loss": 0.3991, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.4619883040935673, |
|
"grad_norm": 2.545884370803833, |
|
"learning_rate": 0.0008538011695906432, |
|
"loss": 0.4192, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.528442317916002, |
|
"grad_norm": 0.12403066456317902, |
|
"learning_rate": 0.0008471557682083998, |
|
"loss": 0.3563, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.594896331738437, |
|
"grad_norm": 41.519954681396484, |
|
"learning_rate": 0.0008405103668261563, |
|
"loss": 0.3435, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.661350345560872, |
|
"grad_norm": 83.61852264404297, |
|
"learning_rate": 0.0008338649654439129, |
|
"loss": 0.3503, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.7278043593833068, |
|
"grad_norm": 0.001769404741935432, |
|
"learning_rate": 0.0008272195640616694, |
|
"loss": 0.3238, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.7942583732057416, |
|
"grad_norm": 1.7677043676376343, |
|
"learning_rate": 0.0008205741626794258, |
|
"loss": 0.38, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.8607123870281765, |
|
"grad_norm": 1.0566127300262451, |
|
"learning_rate": 0.0008139287612971824, |
|
"loss": 0.4146, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.9271664008506115, |
|
"grad_norm": 19.463109970092773, |
|
"learning_rate": 0.0008072833599149389, |
|
"loss": 0.4305, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.9936204146730463, |
|
"grad_norm": 17.069889068603516, |
|
"learning_rate": 0.0008006379585326954, |
|
"loss": 0.3505, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9482649842271293, |
|
"eval_f1": 0.9478124684113843, |
|
"eval_loss": 0.3766539990901947, |
|
"eval_precision": 0.9481744874506283, |
|
"eval_recall": 0.9482649842271293, |
|
"eval_runtime": 4.5607, |
|
"eval_samples_per_second": 347.537, |
|
"eval_steps_per_second": 87.049, |
|
"step": 15048 |
|
}, |
|
{ |
|
"epoch": 2.060074428495481, |
|
"grad_norm": 0.4118238389492035, |
|
"learning_rate": 0.000793992557150452, |
|
"loss": 0.3021, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.126528442317916, |
|
"grad_norm": 0.4119320213794708, |
|
"learning_rate": 0.0007873471557682083, |
|
"loss": 0.3166, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.192982456140351, |
|
"grad_norm": 10.00361442565918, |
|
"learning_rate": 0.0007807017543859649, |
|
"loss": 0.374, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.2594364699627856, |
|
"grad_norm": 44.608726501464844, |
|
"learning_rate": 0.0007740563530037215, |
|
"loss": 0.4748, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.3258904837852206, |
|
"grad_norm": 0.09617531299591064, |
|
"learning_rate": 0.000767410951621478, |
|
"loss": 0.3771, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.3923444976076556, |
|
"grad_norm": 26.71993064880371, |
|
"learning_rate": 0.0007607655502392344, |
|
"loss": 0.4181, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.4587985114300905, |
|
"grad_norm": 0.003970532212406397, |
|
"learning_rate": 0.000754120148856991, |
|
"loss": 0.3365, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.525252525252525, |
|
"grad_norm": 0.023912647739052773, |
|
"learning_rate": 0.0007474747474747475, |
|
"loss": 0.3731, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.59170653907496, |
|
"grad_norm": 0.08333996683359146, |
|
"learning_rate": 0.000740829346092504, |
|
"loss": 0.4489, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.658160552897395, |
|
"grad_norm": 0.01645304262638092, |
|
"learning_rate": 0.0007341839447102606, |
|
"loss": 0.4246, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.72461456671983, |
|
"grad_norm": 0.08779849112033844, |
|
"learning_rate": 0.000727538543328017, |
|
"loss": 0.4556, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.7910685805422646, |
|
"grad_norm": 52.66293716430664, |
|
"learning_rate": 0.0007208931419457735, |
|
"loss": 0.3538, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.8575225943646996, |
|
"grad_norm": 0.028336428105831146, |
|
"learning_rate": 0.00071424774056353, |
|
"loss": 0.3813, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.9239766081871346, |
|
"grad_norm": 0.30558499693870544, |
|
"learning_rate": 0.0007076023391812866, |
|
"loss": 0.4138, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.990430622009569, |
|
"grad_norm": 30.89914321899414, |
|
"learning_rate": 0.0007009569377990431, |
|
"loss": 0.3391, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9539432176656152, |
|
"eval_f1": 0.95367799565447, |
|
"eval_loss": 0.34262794256210327, |
|
"eval_precision": 0.9535465559361256, |
|
"eval_recall": 0.9539432176656152, |
|
"eval_runtime": 4.5296, |
|
"eval_samples_per_second": 349.923, |
|
"eval_steps_per_second": 87.646, |
|
"step": 22572 |
|
}, |
|
{ |
|
"epoch": 3.056884635832004, |
|
"grad_norm": 280.99310302734375, |
|
"learning_rate": 0.0006943115364167995, |
|
"loss": 0.3269, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.123338649654439, |
|
"grad_norm": 0.030926929786801338, |
|
"learning_rate": 0.0006876661350345561, |
|
"loss": 0.3015, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.189792663476874, |
|
"grad_norm": 0.1642533391714096, |
|
"learning_rate": 0.0006810207336523127, |
|
"loss": 0.3959, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.256246677299309, |
|
"grad_norm": 4.198115825653076, |
|
"learning_rate": 0.000674375332270069, |
|
"loss": 0.4014, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.3227006911217436, |
|
"grad_norm": 0.007642796263098717, |
|
"learning_rate": 0.0006677299308878256, |
|
"loss": 0.3203, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.3891547049441786, |
|
"grad_norm": 0.018859192728996277, |
|
"learning_rate": 0.0006610845295055822, |
|
"loss": 0.3617, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.4556087187666136, |
|
"grad_norm": 0.1555991768836975, |
|
"learning_rate": 0.0006544391281233386, |
|
"loss": 0.34, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.522062732589048, |
|
"grad_norm": 0.03736409544944763, |
|
"learning_rate": 0.0006477937267410952, |
|
"loss": 0.3342, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.588516746411483, |
|
"grad_norm": 0.0046156104654073715, |
|
"learning_rate": 0.0006411483253588518, |
|
"loss": 0.3961, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.654970760233918, |
|
"grad_norm": 27.846786499023438, |
|
"learning_rate": 0.0006345029239766082, |
|
"loss": 0.2895, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.721424774056353, |
|
"grad_norm": 19.202760696411133, |
|
"learning_rate": 0.0006278575225943647, |
|
"loss": 0.4071, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.787878787878788, |
|
"grad_norm": 0.007552656345069408, |
|
"learning_rate": 0.0006212121212121212, |
|
"loss": 0.3859, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.8543328017012226, |
|
"grad_norm": 0.029448220506310463, |
|
"learning_rate": 0.0006145667198298778, |
|
"loss": 0.3642, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.9207868155236576, |
|
"grad_norm": 2.9489197731018066, |
|
"learning_rate": 0.0006079213184476342, |
|
"loss": 0.3331, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.9872408293460926, |
|
"grad_norm": 0.13416582345962524, |
|
"learning_rate": 0.0006012759170653907, |
|
"loss": 0.3399, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9533123028391167, |
|
"eval_f1": 0.9528581216338866, |
|
"eval_loss": 0.36635637283325195, |
|
"eval_precision": 0.9528819559731596, |
|
"eval_recall": 0.9533123028391167, |
|
"eval_runtime": 4.1925, |
|
"eval_samples_per_second": 378.06, |
|
"eval_steps_per_second": 94.694, |
|
"step": 30096 |
|
}, |
|
{ |
|
"epoch": 4.053694843168527, |
|
"grad_norm": 28.457218170166016, |
|
"learning_rate": 0.0005946305156831473, |
|
"loss": 0.3025, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.120148856990962, |
|
"grad_norm": 6.5367112159729, |
|
"learning_rate": 0.0005879851143009038, |
|
"loss": 0.314, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.186602870813397, |
|
"grad_norm": 393.4518737792969, |
|
"learning_rate": 0.0005813397129186602, |
|
"loss": 0.3436, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.253056884635832, |
|
"grad_norm": 0.9848179221153259, |
|
"learning_rate": 0.0005746943115364168, |
|
"loss": 0.2768, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 4.319510898458267, |
|
"grad_norm": 2.0531139373779297, |
|
"learning_rate": 0.0005680489101541734, |
|
"loss": 0.3134, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 4.385964912280702, |
|
"grad_norm": 0.055749546736478806, |
|
"learning_rate": 0.0005614035087719298, |
|
"loss": 0.3532, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.452418926103137, |
|
"grad_norm": 0.4778645634651184, |
|
"learning_rate": 0.0005547581073896864, |
|
"loss": 0.3622, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.518872939925571, |
|
"grad_norm": 0.061856046319007874, |
|
"learning_rate": 0.0005481127060074428, |
|
"loss": 0.3426, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.585326953748006, |
|
"grad_norm": 0.026136351749300957, |
|
"learning_rate": 0.0005414673046251993, |
|
"loss": 0.3795, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.651780967570441, |
|
"grad_norm": 0.03556622937321663, |
|
"learning_rate": 0.0005348219032429559, |
|
"loss": 0.3322, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.718234981392876, |
|
"grad_norm": 0.14081618189811707, |
|
"learning_rate": 0.0005281765018607124, |
|
"loss": 0.3722, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.784688995215311, |
|
"grad_norm": 100.0813217163086, |
|
"learning_rate": 0.0005215311004784689, |
|
"loss": 0.3467, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.851143009037746, |
|
"grad_norm": 9.537514686584473, |
|
"learning_rate": 0.0005148856990962254, |
|
"loss": 0.3484, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 4.917597022860181, |
|
"grad_norm": 0.048729896545410156, |
|
"learning_rate": 0.0005082402977139819, |
|
"loss": 0.3439, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.984051036682615, |
|
"grad_norm": 0.005286164116114378, |
|
"learning_rate": 0.0005015948963317385, |
|
"loss": 0.3023, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9570977917981073, |
|
"eval_f1": 0.9568038885748729, |
|
"eval_loss": 0.3057607114315033, |
|
"eval_precision": 0.9566095910966326, |
|
"eval_recall": 0.9570977917981073, |
|
"eval_runtime": 4.2904, |
|
"eval_samples_per_second": 369.428, |
|
"eval_steps_per_second": 92.532, |
|
"step": 37620 |
|
}, |
|
{ |
|
"epoch": 5.05050505050505, |
|
"grad_norm": 123.33903503417969, |
|
"learning_rate": 0.000494949494949495, |
|
"loss": 0.3801, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.116959064327485, |
|
"grad_norm": 0.005817115306854248, |
|
"learning_rate": 0.0004883040935672514, |
|
"loss": 0.3047, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 5.18341307814992, |
|
"grad_norm": 0.16751976311206818, |
|
"learning_rate": 0.000481658692185008, |
|
"loss": 0.4044, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 5.249867091972355, |
|
"grad_norm": 60.48826599121094, |
|
"learning_rate": 0.0004750132908027645, |
|
"loss": 0.3485, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 5.31632110579479, |
|
"grad_norm": 157.16188049316406, |
|
"learning_rate": 0.000468367889420521, |
|
"loss": 0.3368, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 5.382775119617225, |
|
"grad_norm": 45.994049072265625, |
|
"learning_rate": 0.00046172248803827756, |
|
"loss": 0.3816, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 5.44922913343966, |
|
"grad_norm": 15.62516975402832, |
|
"learning_rate": 0.00045507708665603404, |
|
"loss": 0.324, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 5.515683147262095, |
|
"grad_norm": 289.2982177734375, |
|
"learning_rate": 0.0004484316852737905, |
|
"loss": 0.3031, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 5.582137161084529, |
|
"grad_norm": 0.027738776057958603, |
|
"learning_rate": 0.00044178628389154705, |
|
"loss": 0.3392, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.648591174906964, |
|
"grad_norm": 0.02977157197892666, |
|
"learning_rate": 0.0004351408825093036, |
|
"loss": 0.3477, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 5.715045188729399, |
|
"grad_norm": 1.663713812828064, |
|
"learning_rate": 0.0004284954811270601, |
|
"loss": 0.3993, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.781499202551834, |
|
"grad_norm": 2.4411869049072266, |
|
"learning_rate": 0.0004218500797448166, |
|
"loss": 0.422, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 5.847953216374269, |
|
"grad_norm": 12.378539085388184, |
|
"learning_rate": 0.0004152046783625731, |
|
"loss": 0.3649, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.914407230196704, |
|
"grad_norm": 82.05158996582031, |
|
"learning_rate": 0.00040855927698032964, |
|
"loss": 0.4191, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 5.980861244019139, |
|
"grad_norm": 0.008256383240222931, |
|
"learning_rate": 0.0004019138755980861, |
|
"loss": 0.3437, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9501577287066246, |
|
"eval_f1": 0.9497240205967022, |
|
"eval_loss": 0.31248244643211365, |
|
"eval_precision": 0.949826651119135, |
|
"eval_recall": 0.9501577287066246, |
|
"eval_runtime": 4.1272, |
|
"eval_samples_per_second": 384.034, |
|
"eval_steps_per_second": 96.19, |
|
"step": 45144 |
|
}, |
|
{ |
|
"epoch": 6.047315257841573, |
|
"grad_norm": 0.22720667719841003, |
|
"learning_rate": 0.00039526847421584264, |
|
"loss": 0.3774, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 6.113769271664008, |
|
"grad_norm": 0.1796969771385193, |
|
"learning_rate": 0.0003886230728335992, |
|
"loss": 0.3625, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 6.180223285486443, |
|
"grad_norm": 0.06664836406707764, |
|
"learning_rate": 0.00038197767145135565, |
|
"loss": 0.3096, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 6.246677299308878, |
|
"grad_norm": 52.87346267700195, |
|
"learning_rate": 0.0003753322700691122, |
|
"loss": 0.324, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 6.313131313131313, |
|
"grad_norm": 0.13641533255577087, |
|
"learning_rate": 0.0003686868686868687, |
|
"loss": 0.3824, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 6.379585326953748, |
|
"grad_norm": 0.014752733521163464, |
|
"learning_rate": 0.00036204146730462524, |
|
"loss": 0.3576, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 6.446039340776183, |
|
"grad_norm": 0.07991009950637817, |
|
"learning_rate": 0.0003553960659223817, |
|
"loss": 0.2889, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 6.512493354598618, |
|
"grad_norm": 0.0857154056429863, |
|
"learning_rate": 0.0003487506645401382, |
|
"loss": 0.3496, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 6.578947368421053, |
|
"grad_norm": 22.04611587524414, |
|
"learning_rate": 0.00034210526315789477, |
|
"loss": 0.3456, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 6.645401382243487, |
|
"grad_norm": 0.3360465466976166, |
|
"learning_rate": 0.00033545986177565125, |
|
"loss": 0.3113, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.711855396065922, |
|
"grad_norm": 0.011091183871030807, |
|
"learning_rate": 0.0003288144603934078, |
|
"loss": 0.3085, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 6.778309409888357, |
|
"grad_norm": 45.16307830810547, |
|
"learning_rate": 0.00032216905901116425, |
|
"loss": 0.261, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 6.844763423710792, |
|
"grad_norm": 0.10898467898368835, |
|
"learning_rate": 0.0003155236576289208, |
|
"loss": 0.2772, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 6.911217437533227, |
|
"grad_norm": 0.04280232638120651, |
|
"learning_rate": 0.0003088782562466773, |
|
"loss": 0.3664, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.977671451355662, |
|
"grad_norm": 0.44427451491355896, |
|
"learning_rate": 0.0003022328548644338, |
|
"loss": 0.2981, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9570977917981073, |
|
"eval_f1": 0.9567609606627793, |
|
"eval_loss": 0.3381944000720978, |
|
"eval_precision": 0.9567551880330806, |
|
"eval_recall": 0.9570977917981073, |
|
"eval_runtime": 4.1238, |
|
"eval_samples_per_second": 384.357, |
|
"eval_steps_per_second": 96.271, |
|
"step": 52668 |
|
}, |
|
{ |
|
"epoch": 7.044125465178097, |
|
"grad_norm": 12.310619354248047, |
|
"learning_rate": 0.00029558745348219037, |
|
"loss": 0.2961, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 7.110579479000531, |
|
"grad_norm": 0.021439863368868828, |
|
"learning_rate": 0.00028894205209994685, |
|
"loss": 0.3132, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 7.177033492822966, |
|
"grad_norm": 12.506621360778809, |
|
"learning_rate": 0.0002822966507177033, |
|
"loss": 0.3065, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 7.243487506645401, |
|
"grad_norm": 40.974212646484375, |
|
"learning_rate": 0.00027565124933545985, |
|
"loss": 0.3052, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 7.309941520467836, |
|
"grad_norm": 17.352012634277344, |
|
"learning_rate": 0.0002690058479532164, |
|
"loss": 0.3074, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 7.376395534290271, |
|
"grad_norm": 7.186513423919678, |
|
"learning_rate": 0.0002623604465709729, |
|
"loss": 0.2944, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 7.442849548112706, |
|
"grad_norm": 0.11422441154718399, |
|
"learning_rate": 0.0002557150451887294, |
|
"loss": 0.3277, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 7.509303561935141, |
|
"grad_norm": 0.4097649157047272, |
|
"learning_rate": 0.0002490696438064859, |
|
"loss": 0.3314, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 7.575757575757576, |
|
"grad_norm": 255.17686462402344, |
|
"learning_rate": 0.00024242424242424245, |
|
"loss": 0.3849, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 7.642211589580011, |
|
"grad_norm": 0.11329037696123123, |
|
"learning_rate": 0.00023577884104199895, |
|
"loss": 0.3603, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 7.708665603402445, |
|
"grad_norm": 0.04299360513687134, |
|
"learning_rate": 0.00022913343965975545, |
|
"loss": 0.3467, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 7.77511961722488, |
|
"grad_norm": 0.04895203933119774, |
|
"learning_rate": 0.00022248803827751195, |
|
"loss": 0.3428, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 7.841573631047315, |
|
"grad_norm": 0.07165663689374924, |
|
"learning_rate": 0.00021584263689526848, |
|
"loss": 0.2874, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 7.90802764486975, |
|
"grad_norm": 0.10646966099739075, |
|
"learning_rate": 0.00020919723551302499, |
|
"loss": 0.2834, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 7.974481658692185, |
|
"grad_norm": 0.022936491295695305, |
|
"learning_rate": 0.00020255183413078152, |
|
"loss": 0.2899, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9577287066246056, |
|
"eval_f1": 0.9575092656624108, |
|
"eval_loss": 0.30500882863998413, |
|
"eval_precision": 0.9575766504306299, |
|
"eval_recall": 0.9577287066246056, |
|
"eval_runtime": 4.5012, |
|
"eval_samples_per_second": 352.132, |
|
"eval_steps_per_second": 88.2, |
|
"step": 60192 |
|
}, |
|
{ |
|
"epoch": 8.04093567251462, |
|
"grad_norm": 0.4371676743030548, |
|
"learning_rate": 0.00019590643274853802, |
|
"loss": 0.3231, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 8.107389686337054, |
|
"grad_norm": 0.000947824795730412, |
|
"learning_rate": 0.00018926103136629452, |
|
"loss": 0.3014, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 8.17384370015949, |
|
"grad_norm": 0.06363413482904434, |
|
"learning_rate": 0.00018261562998405105, |
|
"loss": 0.2293, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 8.240297713981924, |
|
"grad_norm": 1.2114511728286743, |
|
"learning_rate": 0.00017597022860180755, |
|
"loss": 0.2808, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 8.30675172780436, |
|
"grad_norm": 23.535938262939453, |
|
"learning_rate": 0.00016932482721956408, |
|
"loss": 0.2595, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 8.373205741626794, |
|
"grad_norm": 60.49204635620117, |
|
"learning_rate": 0.00016267942583732056, |
|
"loss": 0.3388, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 8.43965975544923, |
|
"grad_norm": 14.233682632446289, |
|
"learning_rate": 0.0001560340244550771, |
|
"loss": 0.3423, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 8.506113769271664, |
|
"grad_norm": 0.015386885032057762, |
|
"learning_rate": 0.0001493886230728336, |
|
"loss": 0.316, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 8.5725677830941, |
|
"grad_norm": 0.3906301259994507, |
|
"learning_rate": 0.00014274322169059012, |
|
"loss": 0.3165, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 8.639021796916534, |
|
"grad_norm": 0.0586216077208519, |
|
"learning_rate": 0.00013609782030834665, |
|
"loss": 0.3013, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 8.70547581073897, |
|
"grad_norm": 0.006104405503720045, |
|
"learning_rate": 0.00012945241892610312, |
|
"loss": 0.2352, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 8.771929824561404, |
|
"grad_norm": 0.02979845367372036, |
|
"learning_rate": 0.00012280701754385965, |
|
"loss": 0.2203, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 8.83838383838384, |
|
"grad_norm": 0.08639369904994965, |
|
"learning_rate": 0.00011616161616161616, |
|
"loss": 0.2643, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 8.904837852206274, |
|
"grad_norm": 32.0872802734375, |
|
"learning_rate": 0.00010951621477937269, |
|
"loss": 0.2658, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 8.971291866028707, |
|
"grad_norm": 0.011845378205180168, |
|
"learning_rate": 0.00010287081339712919, |
|
"loss": 0.2795, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9646687697160883, |
|
"eval_f1": 0.9644253672098426, |
|
"eval_loss": 0.2595302164554596, |
|
"eval_precision": 0.9644475825303181, |
|
"eval_recall": 0.9646687697160883, |
|
"eval_runtime": 4.3195, |
|
"eval_samples_per_second": 366.941, |
|
"eval_steps_per_second": 91.909, |
|
"step": 67716 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 75240, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2551274670587520.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|