|
{ |
|
"best_metric": 1.4913766384124756, |
|
"best_model_checkpoint": "autotrain-9c20u-twasm/checkpoint-453", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 453, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.013245033112582781, |
|
"grad_norm": 3.3913655281066895, |
|
"learning_rate": 2.173913043478261e-06, |
|
"loss": 2.9946, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.026490066225165563, |
|
"grad_norm": 3.8051860332489014, |
|
"learning_rate": 4.347826086956522e-06, |
|
"loss": 3.0636, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.039735099337748346, |
|
"grad_norm": 4.160909652709961, |
|
"learning_rate": 6.521739130434783e-06, |
|
"loss": 3.0633, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.052980132450331126, |
|
"grad_norm": 3.9198436737060547, |
|
"learning_rate": 8.695652173913044e-06, |
|
"loss": 3.0431, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.06622516556291391, |
|
"grad_norm": 3.415198564529419, |
|
"learning_rate": 1.0869565217391305e-05, |
|
"loss": 3.0373, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07947019867549669, |
|
"grad_norm": 4.432325839996338, |
|
"learning_rate": 1.3043478260869566e-05, |
|
"loss": 3.0171, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.09271523178807947, |
|
"grad_norm": 3.669222354888916, |
|
"learning_rate": 1.5217391304347828e-05, |
|
"loss": 3.0287, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.10596026490066225, |
|
"grad_norm": 4.237015724182129, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 2.9171, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.11920529801324503, |
|
"grad_norm": 4.314387798309326, |
|
"learning_rate": 1.956521739130435e-05, |
|
"loss": 2.9663, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.13245033112582782, |
|
"grad_norm": 4.890565872192383, |
|
"learning_rate": 2.173913043478261e-05, |
|
"loss": 2.885, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1456953642384106, |
|
"grad_norm": 4.127910137176514, |
|
"learning_rate": 2.391304347826087e-05, |
|
"loss": 2.9042, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.15894039735099338, |
|
"grad_norm": 5.797154903411865, |
|
"learning_rate": 2.608695652173913e-05, |
|
"loss": 2.743, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.17218543046357615, |
|
"grad_norm": 4.610489845275879, |
|
"learning_rate": 2.826086956521739e-05, |
|
"loss": 2.727, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.18543046357615894, |
|
"grad_norm": 4.88078498840332, |
|
"learning_rate": 3.0434782608695656e-05, |
|
"loss": 2.6978, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.1986754966887417, |
|
"grad_norm": 4.7736334800720215, |
|
"learning_rate": 3.260869565217392e-05, |
|
"loss": 2.6417, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.2119205298013245, |
|
"grad_norm": 8.030829429626465, |
|
"learning_rate": 3.478260869565218e-05, |
|
"loss": 2.6245, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.2251655629139073, |
|
"grad_norm": 7.201488971710205, |
|
"learning_rate": 3.695652173913043e-05, |
|
"loss": 2.1068, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.23841059602649006, |
|
"grad_norm": 6.687085151672363, |
|
"learning_rate": 3.804347826086957e-05, |
|
"loss": 2.065, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.25165562913907286, |
|
"grad_norm": 7.2274675369262695, |
|
"learning_rate": 4.021739130434783e-05, |
|
"loss": 2.2102, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.26490066225165565, |
|
"grad_norm": 8.127673149108887, |
|
"learning_rate": 4.239130434782609e-05, |
|
"loss": 2.1585, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2781456953642384, |
|
"grad_norm": 8.082962989807129, |
|
"learning_rate": 4.456521739130435e-05, |
|
"loss": 1.651, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.2913907284768212, |
|
"grad_norm": 9.228837013244629, |
|
"learning_rate": 4.673913043478261e-05, |
|
"loss": 2.5421, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.304635761589404, |
|
"grad_norm": 8.194953918457031, |
|
"learning_rate": 4.891304347826087e-05, |
|
"loss": 1.7752, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.31788079470198677, |
|
"grad_norm": 11.126862525939941, |
|
"learning_rate": 4.987714987714988e-05, |
|
"loss": 2.6941, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.33112582781456956, |
|
"grad_norm": 7.496665954589844, |
|
"learning_rate": 4.963144963144963e-05, |
|
"loss": 2.156, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.3443708609271523, |
|
"grad_norm": 6.483855724334717, |
|
"learning_rate": 4.9385749385749387e-05, |
|
"loss": 1.7352, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.3576158940397351, |
|
"grad_norm": 7.87312650680542, |
|
"learning_rate": 4.914004914004915e-05, |
|
"loss": 1.8533, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.3708609271523179, |
|
"grad_norm": 14.155879020690918, |
|
"learning_rate": 4.8894348894348894e-05, |
|
"loss": 1.9163, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.3841059602649007, |
|
"grad_norm": 6.899637699127197, |
|
"learning_rate": 4.8648648648648654e-05, |
|
"loss": 2.0497, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.3973509933774834, |
|
"grad_norm": 9.851097106933594, |
|
"learning_rate": 4.840294840294841e-05, |
|
"loss": 2.0549, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4105960264900662, |
|
"grad_norm": 7.761284828186035, |
|
"learning_rate": 4.8157248157248155e-05, |
|
"loss": 2.4832, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.423841059602649, |
|
"grad_norm": 7.7907819747924805, |
|
"learning_rate": 4.7911547911547915e-05, |
|
"loss": 2.0239, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.4370860927152318, |
|
"grad_norm": 6.249843597412109, |
|
"learning_rate": 4.766584766584767e-05, |
|
"loss": 1.5607, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.4503311258278146, |
|
"grad_norm": 5.760603904724121, |
|
"learning_rate": 4.742014742014742e-05, |
|
"loss": 1.7551, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.46357615894039733, |
|
"grad_norm": 8.171866416931152, |
|
"learning_rate": 4.7174447174447176e-05, |
|
"loss": 1.7013, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.4768211920529801, |
|
"grad_norm": 7.343658924102783, |
|
"learning_rate": 4.692874692874693e-05, |
|
"loss": 2.1362, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.4900662251655629, |
|
"grad_norm": 11.477375030517578, |
|
"learning_rate": 4.6683046683046684e-05, |
|
"loss": 2.0896, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.5033112582781457, |
|
"grad_norm": 6.7531256675720215, |
|
"learning_rate": 4.6437346437346444e-05, |
|
"loss": 1.5969, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.5165562913907285, |
|
"grad_norm": 9.825312614440918, |
|
"learning_rate": 4.619164619164619e-05, |
|
"loss": 2.1228, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.5298013245033113, |
|
"grad_norm": 7.430478096008301, |
|
"learning_rate": 4.594594594594595e-05, |
|
"loss": 2.1018, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.543046357615894, |
|
"grad_norm": 5.134402275085449, |
|
"learning_rate": 4.5700245700245705e-05, |
|
"loss": 1.5301, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.5562913907284768, |
|
"grad_norm": 11.335017204284668, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 1.5022, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.5695364238410596, |
|
"grad_norm": 8.13528823852539, |
|
"learning_rate": 4.520884520884521e-05, |
|
"loss": 2.2219, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.5827814569536424, |
|
"grad_norm": 5.774362564086914, |
|
"learning_rate": 4.4963144963144966e-05, |
|
"loss": 1.5469, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.5960264900662252, |
|
"grad_norm": 10.505985260009766, |
|
"learning_rate": 4.471744471744472e-05, |
|
"loss": 2.1449, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.609271523178808, |
|
"grad_norm": 9.9073486328125, |
|
"learning_rate": 4.447174447174447e-05, |
|
"loss": 1.9911, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.6225165562913907, |
|
"grad_norm": 7.591387748718262, |
|
"learning_rate": 4.422604422604423e-05, |
|
"loss": 1.2767, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.6357615894039735, |
|
"grad_norm": 12.243816375732422, |
|
"learning_rate": 4.398034398034398e-05, |
|
"loss": 2.4575, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.6490066225165563, |
|
"grad_norm": 12.278913497924805, |
|
"learning_rate": 4.373464373464374e-05, |
|
"loss": 2.6426, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.6622516556291391, |
|
"grad_norm": 8.50022029876709, |
|
"learning_rate": 4.348894348894349e-05, |
|
"loss": 1.8184, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6754966887417219, |
|
"grad_norm": 15.808201789855957, |
|
"learning_rate": 4.324324324324325e-05, |
|
"loss": 1.8794, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.6887417218543046, |
|
"grad_norm": 11.01276969909668, |
|
"learning_rate": 4.2997542997543e-05, |
|
"loss": 1.7045, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.7019867549668874, |
|
"grad_norm": 6.432559490203857, |
|
"learning_rate": 4.2751842751842756e-05, |
|
"loss": 1.3662, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.7152317880794702, |
|
"grad_norm": 12.68566608428955, |
|
"learning_rate": 4.250614250614251e-05, |
|
"loss": 1.0612, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.7284768211920529, |
|
"grad_norm": 7.320284843444824, |
|
"learning_rate": 4.226044226044226e-05, |
|
"loss": 1.6157, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7417218543046358, |
|
"grad_norm": 9.558548927307129, |
|
"learning_rate": 4.2014742014742017e-05, |
|
"loss": 1.6096, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.7549668874172185, |
|
"grad_norm": 12.876482009887695, |
|
"learning_rate": 4.176904176904177e-05, |
|
"loss": 2.0925, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.7682119205298014, |
|
"grad_norm": 16.353004455566406, |
|
"learning_rate": 4.1523341523341524e-05, |
|
"loss": 2.4218, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.7814569536423841, |
|
"grad_norm": 8.213098526000977, |
|
"learning_rate": 4.127764127764128e-05, |
|
"loss": 1.4581, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.7947019867549668, |
|
"grad_norm": 7.289383888244629, |
|
"learning_rate": 4.103194103194104e-05, |
|
"loss": 1.3969, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8079470198675497, |
|
"grad_norm": 12.726005554199219, |
|
"learning_rate": 4.0786240786240785e-05, |
|
"loss": 1.6086, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.8211920529801324, |
|
"grad_norm": 6.612705230712891, |
|
"learning_rate": 4.0540540540540545e-05, |
|
"loss": 1.228, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.8344370860927153, |
|
"grad_norm": 8.954344749450684, |
|
"learning_rate": 4.02948402948403e-05, |
|
"loss": 1.9795, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.847682119205298, |
|
"grad_norm": 14.46446418762207, |
|
"learning_rate": 4.004914004914005e-05, |
|
"loss": 1.4446, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.8609271523178808, |
|
"grad_norm": 6.220669746398926, |
|
"learning_rate": 3.9803439803439806e-05, |
|
"loss": 2.0856, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8741721854304636, |
|
"grad_norm": 6.742675304412842, |
|
"learning_rate": 3.955773955773956e-05, |
|
"loss": 1.1761, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.8874172185430463, |
|
"grad_norm": 6.69525146484375, |
|
"learning_rate": 3.9312039312039314e-05, |
|
"loss": 1.2581, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.9006622516556292, |
|
"grad_norm": 5.910060405731201, |
|
"learning_rate": 3.906633906633907e-05, |
|
"loss": 1.1815, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.9139072847682119, |
|
"grad_norm": 5.171950817108154, |
|
"learning_rate": 3.882063882063882e-05, |
|
"loss": 0.9752, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.9271523178807947, |
|
"grad_norm": 7.012332916259766, |
|
"learning_rate": 3.857493857493858e-05, |
|
"loss": 1.7936, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9403973509933775, |
|
"grad_norm": 7.54193639755249, |
|
"learning_rate": 3.8329238329238335e-05, |
|
"loss": 1.4808, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.9536423841059603, |
|
"grad_norm": 7.4099531173706055, |
|
"learning_rate": 3.808353808353808e-05, |
|
"loss": 1.2554, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.9668874172185431, |
|
"grad_norm": 10.860753059387207, |
|
"learning_rate": 3.783783783783784e-05, |
|
"loss": 2.0704, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.9801324503311258, |
|
"grad_norm": 6.87387752532959, |
|
"learning_rate": 3.7592137592137596e-05, |
|
"loss": 1.6014, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.9933774834437086, |
|
"grad_norm": 6.1860175132751465, |
|
"learning_rate": 3.734643734643735e-05, |
|
"loss": 0.9634, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.43, |
|
"eval_f1_macro": 0.10414486290241251, |
|
"eval_f1_micro": 0.43, |
|
"eval_f1_weighted": 0.3778887980009637, |
|
"eval_loss": 2.039196252822876, |
|
"eval_precision_macro": 0.12514957264957266, |
|
"eval_precision_micro": 0.43, |
|
"eval_precision_weighted": 0.4138803418803419, |
|
"eval_recall_macro": 0.1286363636363636, |
|
"eval_recall_micro": 0.43, |
|
"eval_recall_weighted": 0.43, |
|
"eval_runtime": 0.4069, |
|
"eval_samples_per_second": 245.77, |
|
"eval_steps_per_second": 17.204, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.0066225165562914, |
|
"grad_norm": 13.422128677368164, |
|
"learning_rate": 3.71007371007371e-05, |
|
"loss": 1.1134, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.0198675496688743, |
|
"grad_norm": 7.478858947753906, |
|
"learning_rate": 3.685503685503686e-05, |
|
"loss": 1.177, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.033112582781457, |
|
"grad_norm": 9.430647850036621, |
|
"learning_rate": 3.660933660933661e-05, |
|
"loss": 2.2911, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.0463576158940397, |
|
"grad_norm": 5.825343608856201, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 1.2509, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.0596026490066226, |
|
"grad_norm": 9.491595268249512, |
|
"learning_rate": 3.611793611793612e-05, |
|
"loss": 1.3177, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0728476821192052, |
|
"grad_norm": 5.076850891113281, |
|
"learning_rate": 3.587223587223588e-05, |
|
"loss": 0.9434, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.086092715231788, |
|
"grad_norm": 5.8091206550598145, |
|
"learning_rate": 3.562653562653563e-05, |
|
"loss": 1.4945, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.099337748344371, |
|
"grad_norm": 5.286921501159668, |
|
"learning_rate": 3.538083538083538e-05, |
|
"loss": 0.9087, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.1125827814569536, |
|
"grad_norm": 14.105121612548828, |
|
"learning_rate": 3.513513513513514e-05, |
|
"loss": 1.8953, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.1258278145695364, |
|
"grad_norm": 7.049529552459717, |
|
"learning_rate": 3.488943488943489e-05, |
|
"loss": 1.2879, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1390728476821192, |
|
"grad_norm": 5.330937385559082, |
|
"learning_rate": 3.4643734643734647e-05, |
|
"loss": 1.0429, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.152317880794702, |
|
"grad_norm": 14.564863204956055, |
|
"learning_rate": 3.43980343980344e-05, |
|
"loss": 1.2875, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.1655629139072847, |
|
"grad_norm": 13.017091751098633, |
|
"learning_rate": 3.4152334152334154e-05, |
|
"loss": 1.0357, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.1788079470198676, |
|
"grad_norm": 13.703240394592285, |
|
"learning_rate": 3.390663390663391e-05, |
|
"loss": 1.5905, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.1920529801324504, |
|
"grad_norm": 7.464919567108154, |
|
"learning_rate": 3.366093366093366e-05, |
|
"loss": 1.5741, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.205298013245033, |
|
"grad_norm": 7.947140216827393, |
|
"learning_rate": 3.3415233415233415e-05, |
|
"loss": 1.845, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.218543046357616, |
|
"grad_norm": 6.652373790740967, |
|
"learning_rate": 3.3169533169533175e-05, |
|
"loss": 1.4041, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.2317880794701987, |
|
"grad_norm": 5.824616432189941, |
|
"learning_rate": 3.292383292383293e-05, |
|
"loss": 1.5873, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.2450331125827814, |
|
"grad_norm": 10.095503807067871, |
|
"learning_rate": 3.2678132678132676e-05, |
|
"loss": 1.4017, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.2582781456953642, |
|
"grad_norm": 5.2991766929626465, |
|
"learning_rate": 3.2432432432432436e-05, |
|
"loss": 1.395, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.271523178807947, |
|
"grad_norm": 4.981668472290039, |
|
"learning_rate": 3.218673218673219e-05, |
|
"loss": 1.4415, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 1.2847682119205297, |
|
"grad_norm": 8.634035110473633, |
|
"learning_rate": 3.1941031941031943e-05, |
|
"loss": 1.3291, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 1.2980132450331126, |
|
"grad_norm": 8.355801582336426, |
|
"learning_rate": 3.16953316953317e-05, |
|
"loss": 0.9761, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.3112582781456954, |
|
"grad_norm": NaN, |
|
"learning_rate": 3.1572481572481574e-05, |
|
"loss": 1.1352, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 1.3245033112582782, |
|
"grad_norm": 5.318390846252441, |
|
"learning_rate": 3.132678132678133e-05, |
|
"loss": 1.1314, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.3377483443708609, |
|
"grad_norm": 7.791065692901611, |
|
"learning_rate": 3.108108108108108e-05, |
|
"loss": 1.5313, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 1.3509933774834437, |
|
"grad_norm": 21.741308212280273, |
|
"learning_rate": 3.083538083538084e-05, |
|
"loss": 1.7144, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.3642384105960264, |
|
"grad_norm": 14.097997665405273, |
|
"learning_rate": 3.058968058968059e-05, |
|
"loss": 1.749, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.3774834437086092, |
|
"grad_norm": 6.286694526672363, |
|
"learning_rate": 3.0343980343980342e-05, |
|
"loss": 1.3443, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 1.390728476821192, |
|
"grad_norm": 7.453071117401123, |
|
"learning_rate": 3.0098280098280103e-05, |
|
"loss": 0.85, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.403973509933775, |
|
"grad_norm": 10.761088371276855, |
|
"learning_rate": 2.9852579852579853e-05, |
|
"loss": 0.8886, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 1.4172185430463577, |
|
"grad_norm": 5.939634799957275, |
|
"learning_rate": 2.9606879606879607e-05, |
|
"loss": 1.2804, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 1.4304635761589404, |
|
"grad_norm": 9.565838813781738, |
|
"learning_rate": 2.9361179361179364e-05, |
|
"loss": 1.5924, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 1.4437086092715232, |
|
"grad_norm": 10.187567710876465, |
|
"learning_rate": 2.9115479115479117e-05, |
|
"loss": 1.3317, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 1.4569536423841059, |
|
"grad_norm": 8.318490028381348, |
|
"learning_rate": 2.8869778869778868e-05, |
|
"loss": 1.617, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4701986754966887, |
|
"grad_norm": 7.841922283172607, |
|
"learning_rate": 2.8624078624078625e-05, |
|
"loss": 1.463, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 1.4834437086092715, |
|
"grad_norm": 6.505579471588135, |
|
"learning_rate": 2.8378378378378378e-05, |
|
"loss": 1.123, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.4966887417218544, |
|
"grad_norm": 5.509284496307373, |
|
"learning_rate": 2.8132678132678135e-05, |
|
"loss": 1.0619, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 1.5099337748344372, |
|
"grad_norm": 4.9719367027282715, |
|
"learning_rate": 2.788697788697789e-05, |
|
"loss": 0.6785, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 1.5231788079470199, |
|
"grad_norm": 7.88023567199707, |
|
"learning_rate": 2.764127764127764e-05, |
|
"loss": 1.1481, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5364238410596025, |
|
"grad_norm": 5.841632843017578, |
|
"learning_rate": 2.73955773955774e-05, |
|
"loss": 0.7702, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 1.5496688741721854, |
|
"grad_norm": 8.680448532104492, |
|
"learning_rate": 2.714987714987715e-05, |
|
"loss": 1.4025, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 1.5629139072847682, |
|
"grad_norm": 5.259287357330322, |
|
"learning_rate": 2.6904176904176904e-05, |
|
"loss": 0.8707, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 1.576158940397351, |
|
"grad_norm": 5.991212368011475, |
|
"learning_rate": 2.665847665847666e-05, |
|
"loss": 0.5955, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.589403973509934, |
|
"grad_norm": 11.001547813415527, |
|
"learning_rate": 2.6412776412776414e-05, |
|
"loss": 1.5866, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.6026490066225165, |
|
"grad_norm": 5.934292316436768, |
|
"learning_rate": 2.616707616707617e-05, |
|
"loss": 1.3472, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 1.6158940397350994, |
|
"grad_norm": 16.11164665222168, |
|
"learning_rate": 2.5921375921375925e-05, |
|
"loss": 1.2663, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 1.629139072847682, |
|
"grad_norm": 10.690239906311035, |
|
"learning_rate": 2.5675675675675675e-05, |
|
"loss": 1.5754, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 1.6423841059602649, |
|
"grad_norm": 7.545533657073975, |
|
"learning_rate": 2.5429975429975432e-05, |
|
"loss": 0.9722, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 1.6556291390728477, |
|
"grad_norm": 7.82634162902832, |
|
"learning_rate": 2.5184275184275186e-05, |
|
"loss": 1.8525, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6688741721854305, |
|
"grad_norm": 11.477102279663086, |
|
"learning_rate": 2.493857493857494e-05, |
|
"loss": 0.9904, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.6821192052980134, |
|
"grad_norm": 8.056424140930176, |
|
"learning_rate": 2.4692874692874693e-05, |
|
"loss": 1.6071, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 1.695364238410596, |
|
"grad_norm": 8.872392654418945, |
|
"learning_rate": 2.4447174447174447e-05, |
|
"loss": 1.8732, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 1.7086092715231787, |
|
"grad_norm": 8.307268142700195, |
|
"learning_rate": 2.4201474201474204e-05, |
|
"loss": 1.2843, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 1.7218543046357615, |
|
"grad_norm": 8.889561653137207, |
|
"learning_rate": 2.3955773955773958e-05, |
|
"loss": 1.8855, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.7350993377483444, |
|
"grad_norm": 7.262237071990967, |
|
"learning_rate": 2.371007371007371e-05, |
|
"loss": 0.6363, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 1.7483443708609272, |
|
"grad_norm": 9.860976219177246, |
|
"learning_rate": 2.3464373464373465e-05, |
|
"loss": 0.9588, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 1.76158940397351, |
|
"grad_norm": 8.475176811218262, |
|
"learning_rate": 2.3218673218673222e-05, |
|
"loss": 1.4638, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.7748344370860927, |
|
"grad_norm": 6.681599140167236, |
|
"learning_rate": 2.2972972972972976e-05, |
|
"loss": 0.9692, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 1.7880794701986755, |
|
"grad_norm": 9.975168228149414, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 2.0131, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.8013245033112582, |
|
"grad_norm": 9.302199363708496, |
|
"learning_rate": 2.2481572481572483e-05, |
|
"loss": 1.4159, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 1.814569536423841, |
|
"grad_norm": 10.039788246154785, |
|
"learning_rate": 2.2235872235872237e-05, |
|
"loss": 1.5486, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 1.8278145695364238, |
|
"grad_norm": 7.697207927703857, |
|
"learning_rate": 2.199017199017199e-05, |
|
"loss": 1.0797, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 1.8410596026490067, |
|
"grad_norm": 7.76730489730835, |
|
"learning_rate": 2.1744471744471744e-05, |
|
"loss": 1.1921, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 1.8543046357615895, |
|
"grad_norm": 7.227729320526123, |
|
"learning_rate": 2.14987714987715e-05, |
|
"loss": 1.6573, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.8675496688741722, |
|
"grad_norm": 6.351704120635986, |
|
"learning_rate": 2.1253071253071255e-05, |
|
"loss": 0.6416, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 1.8807947019867548, |
|
"grad_norm": 7.4418511390686035, |
|
"learning_rate": 2.1007371007371008e-05, |
|
"loss": 1.7128, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 1.8940397350993377, |
|
"grad_norm": 9.884121894836426, |
|
"learning_rate": 2.0761670761670762e-05, |
|
"loss": 2.3348, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 1.9072847682119205, |
|
"grad_norm": 11.193192481994629, |
|
"learning_rate": 2.051597051597052e-05, |
|
"loss": 0.9744, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 1.9205298013245033, |
|
"grad_norm": 6.8167829513549805, |
|
"learning_rate": 2.0270270270270273e-05, |
|
"loss": 1.2415, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.9337748344370862, |
|
"grad_norm": 6.478491306304932, |
|
"learning_rate": 2.0024570024570026e-05, |
|
"loss": 1.1914, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 1.9470198675496688, |
|
"grad_norm": 7.969542026519775, |
|
"learning_rate": 1.977886977886978e-05, |
|
"loss": 1.5221, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.9602649006622517, |
|
"grad_norm": 7.827632427215576, |
|
"learning_rate": 1.9533169533169534e-05, |
|
"loss": 0.7076, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 1.9735099337748343, |
|
"grad_norm": 5.793867111206055, |
|
"learning_rate": 1.928746928746929e-05, |
|
"loss": 1.1292, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 1.9867549668874172, |
|
"grad_norm": 7.288516998291016, |
|
"learning_rate": 1.904176904176904e-05, |
|
"loss": 0.8474, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 19.54204559326172, |
|
"learning_rate": 1.8796068796068798e-05, |
|
"loss": 0.8084, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.52, |
|
"eval_f1_macro": 0.16279761904761905, |
|
"eval_f1_micro": 0.52, |
|
"eval_f1_weighted": 0.4582380952380952, |
|
"eval_loss": 1.6385834217071533, |
|
"eval_precision_macro": 0.17940422322775265, |
|
"eval_precision_micro": 0.52, |
|
"eval_precision_weighted": 0.47573152337858227, |
|
"eval_recall_macro": 0.2141883116883117, |
|
"eval_recall_micro": 0.52, |
|
"eval_recall_weighted": 0.52, |
|
"eval_runtime": 0.3788, |
|
"eval_samples_per_second": 263.985, |
|
"eval_steps_per_second": 18.479, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 2.013245033112583, |
|
"grad_norm": 3.1326494216918945, |
|
"learning_rate": 1.855036855036855e-05, |
|
"loss": 0.4869, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 2.0264900662251657, |
|
"grad_norm": 7.844410419464111, |
|
"learning_rate": 1.8304668304668305e-05, |
|
"loss": 1.0237, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 2.0397350993377485, |
|
"grad_norm": 8.277088165283203, |
|
"learning_rate": 1.805896805896806e-05, |
|
"loss": 0.8911, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.052980132450331, |
|
"grad_norm": 4.819505214691162, |
|
"learning_rate": 1.7813267813267816e-05, |
|
"loss": 0.6859, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.066225165562914, |
|
"grad_norm": 7.4773359298706055, |
|
"learning_rate": 1.756756756756757e-05, |
|
"loss": 1.0567, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 2.0794701986754967, |
|
"grad_norm": 8.229156494140625, |
|
"learning_rate": 1.7321867321867323e-05, |
|
"loss": 1.2833, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 2.0927152317880795, |
|
"grad_norm": 11.932225227355957, |
|
"learning_rate": 1.7076167076167077e-05, |
|
"loss": 1.2839, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 2.1059602649006623, |
|
"grad_norm": 4.1798095703125, |
|
"learning_rate": 1.683046683046683e-05, |
|
"loss": 0.335, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 2.119205298013245, |
|
"grad_norm": 4.319066524505615, |
|
"learning_rate": 1.6584766584766588e-05, |
|
"loss": 0.3695, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.1324503311258276, |
|
"grad_norm": 4.868326187133789, |
|
"learning_rate": 1.6339066339066338e-05, |
|
"loss": 0.5056, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.1456953642384105, |
|
"grad_norm": 11.324278831481934, |
|
"learning_rate": 1.6093366093366095e-05, |
|
"loss": 1.6924, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 2.1589403973509933, |
|
"grad_norm": 7.622723579406738, |
|
"learning_rate": 1.584766584766585e-05, |
|
"loss": 1.1639, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 2.172185430463576, |
|
"grad_norm": 9.650656700134277, |
|
"learning_rate": 1.5601965601965606e-05, |
|
"loss": 1.449, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 2.185430463576159, |
|
"grad_norm": 8.743961334228516, |
|
"learning_rate": 1.5356265356265356e-05, |
|
"loss": 1.319, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 2.198675496688742, |
|
"grad_norm": 8.732221603393555, |
|
"learning_rate": 1.5110565110565111e-05, |
|
"loss": 1.2402, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 2.2119205298013247, |
|
"grad_norm": 7.377650737762451, |
|
"learning_rate": 1.4864864864864867e-05, |
|
"loss": 1.0142, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 2.225165562913907, |
|
"grad_norm": 7.734024524688721, |
|
"learning_rate": 1.4619164619164619e-05, |
|
"loss": 1.0699, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.23841059602649, |
|
"grad_norm": 6.139077663421631, |
|
"learning_rate": 1.4373464373464374e-05, |
|
"loss": 0.5206, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 2.251655629139073, |
|
"grad_norm": 7.571850299835205, |
|
"learning_rate": 1.412776412776413e-05, |
|
"loss": 0.8841, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.2649006622516556, |
|
"grad_norm": 9.102130889892578, |
|
"learning_rate": 1.3882063882063885e-05, |
|
"loss": 0.6576, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 2.2781456953642385, |
|
"grad_norm": 8.144769668579102, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 0.7361, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 2.2913907284768213, |
|
"grad_norm": 9.98493480682373, |
|
"learning_rate": 1.339066339066339e-05, |
|
"loss": 1.1676, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 2.304635761589404, |
|
"grad_norm": 10.540783882141113, |
|
"learning_rate": 1.3144963144963146e-05, |
|
"loss": 1.3709, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 2.3178807947019866, |
|
"grad_norm": 10.3671293258667, |
|
"learning_rate": 1.2899262899262901e-05, |
|
"loss": 0.8397, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.3311258278145695, |
|
"grad_norm": 4.67352819442749, |
|
"learning_rate": 1.2653562653562653e-05, |
|
"loss": 0.539, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 2.3443708609271523, |
|
"grad_norm": 9.957860946655273, |
|
"learning_rate": 1.2407862407862408e-05, |
|
"loss": 1.4744, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 2.357615894039735, |
|
"grad_norm": 8.535019874572754, |
|
"learning_rate": 1.2162162162162164e-05, |
|
"loss": 1.3841, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 2.370860927152318, |
|
"grad_norm": 8.11552906036377, |
|
"learning_rate": 1.1916461916461917e-05, |
|
"loss": 1.1301, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 2.384105960264901, |
|
"grad_norm": 7.197366237640381, |
|
"learning_rate": 1.1670761670761671e-05, |
|
"loss": 0.6156, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.3973509933774833, |
|
"grad_norm": 4.6600022315979, |
|
"learning_rate": 1.1425061425061426e-05, |
|
"loss": 0.7303, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 2.410596026490066, |
|
"grad_norm": 6.252689838409424, |
|
"learning_rate": 1.117936117936118e-05, |
|
"loss": 0.9769, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.423841059602649, |
|
"grad_norm": 9.90804672241211, |
|
"learning_rate": 1.0933660933660935e-05, |
|
"loss": 1.093, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 2.437086092715232, |
|
"grad_norm": 9.742344856262207, |
|
"learning_rate": 1.0687960687960689e-05, |
|
"loss": 1.0279, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 2.4503311258278146, |
|
"grad_norm": 9.368986129760742, |
|
"learning_rate": 1.0442260442260443e-05, |
|
"loss": 1.2061, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.4635761589403975, |
|
"grad_norm": 6.378131866455078, |
|
"learning_rate": 1.0196560196560196e-05, |
|
"loss": 1.0066, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 2.47682119205298, |
|
"grad_norm": 7.304665565490723, |
|
"learning_rate": 9.950859950859952e-06, |
|
"loss": 2.2284, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 2.4900662251655628, |
|
"grad_norm": 5.73162317276001, |
|
"learning_rate": 9.705159705159705e-06, |
|
"loss": 0.4927, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 2.5033112582781456, |
|
"grad_norm": 9.693008422851562, |
|
"learning_rate": 9.45945945945946e-06, |
|
"loss": 0.9368, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.5165562913907285, |
|
"grad_norm": 7.258613586425781, |
|
"learning_rate": 9.213759213759214e-06, |
|
"loss": 0.5097, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.5298013245033113, |
|
"grad_norm": 6.431227684020996, |
|
"learning_rate": 8.96805896805897e-06, |
|
"loss": 0.9039, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 2.543046357615894, |
|
"grad_norm": 8.578726768493652, |
|
"learning_rate": 8.722358722358723e-06, |
|
"loss": 0.8253, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 2.556291390728477, |
|
"grad_norm": 5.389899730682373, |
|
"learning_rate": 8.476658476658477e-06, |
|
"loss": 0.4513, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 2.5695364238410594, |
|
"grad_norm": 10.525187492370605, |
|
"learning_rate": 8.230958230958232e-06, |
|
"loss": 1.1571, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 2.5827814569536423, |
|
"grad_norm": 5.4934186935424805, |
|
"learning_rate": 7.985257985257986e-06, |
|
"loss": 0.7821, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.596026490066225, |
|
"grad_norm": 9.986252784729004, |
|
"learning_rate": 7.73955773955774e-06, |
|
"loss": 0.8019, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.609271523178808, |
|
"grad_norm": 9.1427001953125, |
|
"learning_rate": 7.493857493857494e-06, |
|
"loss": 1.349, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 2.622516556291391, |
|
"grad_norm": 8.039468765258789, |
|
"learning_rate": 7.2481572481572485e-06, |
|
"loss": 1.0867, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 2.6357615894039736, |
|
"grad_norm": 9.093656539916992, |
|
"learning_rate": 7.002457002457002e-06, |
|
"loss": 1.5422, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 2.6490066225165565, |
|
"grad_norm": 13.18973445892334, |
|
"learning_rate": 6.7567567567567575e-06, |
|
"loss": 0.4601, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.662251655629139, |
|
"grad_norm": 9.986296653747559, |
|
"learning_rate": 6.511056511056511e-06, |
|
"loss": 1.0286, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 2.6754966887417218, |
|
"grad_norm": 9.373101234436035, |
|
"learning_rate": 6.2653562653562665e-06, |
|
"loss": 0.8767, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 2.6887417218543046, |
|
"grad_norm": 9.866012573242188, |
|
"learning_rate": 6.019656019656019e-06, |
|
"loss": 1.1937, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.7019867549668874, |
|
"grad_norm": 6.277444839477539, |
|
"learning_rate": 5.773955773955774e-06, |
|
"loss": 1.1616, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 2.7152317880794703, |
|
"grad_norm": 5.679011344909668, |
|
"learning_rate": 5.528255528255528e-06, |
|
"loss": 0.7223, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.7284768211920527, |
|
"grad_norm": 4.74644660949707, |
|
"learning_rate": 5.282555282555283e-06, |
|
"loss": 0.5082, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 2.741721854304636, |
|
"grad_norm": 13.121922492980957, |
|
"learning_rate": 5.036855036855037e-06, |
|
"loss": 1.5552, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 2.7549668874172184, |
|
"grad_norm": 8.272148132324219, |
|
"learning_rate": 4.791154791154792e-06, |
|
"loss": 0.9687, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 2.7682119205298013, |
|
"grad_norm": 7.133453369140625, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 0.5568, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 2.781456953642384, |
|
"grad_norm": 5.385309219360352, |
|
"learning_rate": 4.2997542997543e-06, |
|
"loss": 0.5379, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.794701986754967, |
|
"grad_norm": 9.613791465759277, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 1.3592, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 2.80794701986755, |
|
"grad_norm": 8.493631362915039, |
|
"learning_rate": 3.8083538083538086e-06, |
|
"loss": 1.1874, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 2.821192052980132, |
|
"grad_norm": 5.972334861755371, |
|
"learning_rate": 3.562653562653563e-06, |
|
"loss": 0.8684, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 2.8344370860927155, |
|
"grad_norm": 10.33562183380127, |
|
"learning_rate": 3.3169533169533168e-06, |
|
"loss": 1.0385, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 2.847682119205298, |
|
"grad_norm": 4.6230363845825195, |
|
"learning_rate": 3.0712530712530717e-06, |
|
"loss": 0.8805, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.8609271523178808, |
|
"grad_norm": 16.691070556640625, |
|
"learning_rate": 2.9484029484029485e-06, |
|
"loss": 1.494, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 2.8741721854304636, |
|
"grad_norm": 6.581092834472656, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.4563, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.8874172185430464, |
|
"grad_norm": 8.806626319885254, |
|
"learning_rate": 2.457002457002457e-06, |
|
"loss": 1.3625, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 2.9006622516556293, |
|
"grad_norm": 5.90773344039917, |
|
"learning_rate": 2.211302211302211e-06, |
|
"loss": 1.1598, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 2.9139072847682117, |
|
"grad_norm": 7.406730651855469, |
|
"learning_rate": 1.9656019656019657e-06, |
|
"loss": 0.7975, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.9271523178807946, |
|
"grad_norm": 5.467130184173584, |
|
"learning_rate": 1.71990171990172e-06, |
|
"loss": 0.4307, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 2.9403973509933774, |
|
"grad_norm": 7.527857780456543, |
|
"learning_rate": 1.4742014742014743e-06, |
|
"loss": 0.6769, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 2.9536423841059603, |
|
"grad_norm": 11.855218887329102, |
|
"learning_rate": 1.2285012285012285e-06, |
|
"loss": 1.4558, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 2.966887417218543, |
|
"grad_norm": 9.057221412658691, |
|
"learning_rate": 9.828009828009828e-07, |
|
"loss": 1.6682, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 2.980132450331126, |
|
"grad_norm": 8.127178192138672, |
|
"learning_rate": 7.371007371007371e-07, |
|
"loss": 0.9139, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.993377483443709, |
|
"grad_norm": 7.9965128898620605, |
|
"learning_rate": 4.914004914004914e-07, |
|
"loss": 0.7603, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.64, |
|
"eval_f1_macro": 0.28164367547346275, |
|
"eval_f1_micro": 0.64, |
|
"eval_f1_weighted": 0.5917376665887304, |
|
"eval_loss": 1.4913766384124756, |
|
"eval_precision_macro": 0.2705775014459225, |
|
"eval_precision_micro": 0.64, |
|
"eval_precision_weighted": 0.5802396761133604, |
|
"eval_recall_macro": 0.3324350649350649, |
|
"eval_recall_micro": 0.64, |
|
"eval_recall_weighted": 0.64, |
|
"eval_runtime": 0.4059, |
|
"eval_samples_per_second": 246.346, |
|
"eval_steps_per_second": 17.244, |
|
"step": 453 |
|
} |
|
], |
|
"logging_steps": 2, |
|
"max_steps": 453, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 237039835640832.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|