maven_ere_trigger_seq2seq / trainer_state.json
ahmeshaf's picture
Upload 12 files
328170c verified
raw
history blame contribute delete
No virus
176 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 27.51196172248804,
"eval_steps": 1000,
"global_step": 92000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"grad_norm": 3.539609432220459,
"learning_rate": 4.99925228054434e-05,
"loss": 2.134,
"step": 100
},
{
"epoch": 0.06,
"grad_norm": 3.197829246520996,
"learning_rate": 4.997756841633019e-05,
"loss": 0.6178,
"step": 200
},
{
"epoch": 0.09,
"grad_norm": 3.3991429805755615,
"learning_rate": 4.996261402721699e-05,
"loss": 0.5496,
"step": 300
},
{
"epoch": 0.12,
"grad_norm": 3.072633743286133,
"learning_rate": 4.9947659638103784e-05,
"loss": 0.5228,
"step": 400
},
{
"epoch": 0.15,
"grad_norm": 2.4815468788146973,
"learning_rate": 4.993270524899058e-05,
"loss": 0.5102,
"step": 500
},
{
"epoch": 0.18,
"grad_norm": 2.794753313064575,
"learning_rate": 4.991775085987738e-05,
"loss": 0.4746,
"step": 600
},
{
"epoch": 0.21,
"grad_norm": 2.1388251781463623,
"learning_rate": 4.9902796470764176e-05,
"loss": 0.4769,
"step": 700
},
{
"epoch": 0.24,
"grad_norm": 2.518214225769043,
"learning_rate": 4.988784208165096e-05,
"loss": 0.4476,
"step": 800
},
{
"epoch": 0.27,
"grad_norm": 4.257823467254639,
"learning_rate": 4.987288769253776e-05,
"loss": 0.439,
"step": 900
},
{
"epoch": 0.3,
"grad_norm": 2.0235888957977295,
"learning_rate": 4.985793330342456e-05,
"loss": 0.4465,
"step": 1000
},
{
"epoch": 0.3,
"eval_loss": 0.34466782212257385,
"eval_precision": 0.7649398815576958,
"eval_recall": 0.7874318790603159,
"eval_runtime": 321.2695,
"eval_samples_per_second": 41.629,
"eval_steps_per_second": 1.301,
"step": 1000
},
{
"epoch": 0.33,
"grad_norm": 2.372622489929199,
"learning_rate": 4.984297891431135e-05,
"loss": 0.438,
"step": 1100
},
{
"epoch": 0.36,
"grad_norm": 2.184081792831421,
"learning_rate": 4.982802452519815e-05,
"loss": 0.4319,
"step": 1200
},
{
"epoch": 0.39,
"grad_norm": 1.180004358291626,
"learning_rate": 4.981307013608494e-05,
"loss": 0.4153,
"step": 1300
},
{
"epoch": 0.42,
"grad_norm": 1.8515098094940186,
"learning_rate": 4.979811574697174e-05,
"loss": 0.4107,
"step": 1400
},
{
"epoch": 0.45,
"grad_norm": 2.0762712955474854,
"learning_rate": 4.978316135785853e-05,
"loss": 0.4087,
"step": 1500
},
{
"epoch": 0.48,
"grad_norm": 1.6716846227645874,
"learning_rate": 4.9768206968745326e-05,
"loss": 0.4082,
"step": 1600
},
{
"epoch": 0.51,
"grad_norm": 2.9515812397003174,
"learning_rate": 4.9753252579632126e-05,
"loss": 0.398,
"step": 1700
},
{
"epoch": 0.54,
"grad_norm": 1.9658855199813843,
"learning_rate": 4.973829819051892e-05,
"loss": 0.393,
"step": 1800
},
{
"epoch": 0.57,
"grad_norm": 1.9613778591156006,
"learning_rate": 4.972334380140571e-05,
"loss": 0.3904,
"step": 1900
},
{
"epoch": 0.6,
"grad_norm": 2.7774882316589355,
"learning_rate": 4.970838941229251e-05,
"loss": 0.3794,
"step": 2000
},
{
"epoch": 0.6,
"eval_loss": 0.310618132352829,
"eval_precision": 0.7516943243620137,
"eval_recall": 0.8298285045721852,
"eval_runtime": 320.9754,
"eval_samples_per_second": 41.667,
"eval_steps_per_second": 1.302,
"step": 2000
},
{
"epoch": 0.63,
"grad_norm": 1.4382622241973877,
"learning_rate": 4.969343502317931e-05,
"loss": 0.369,
"step": 2100
},
{
"epoch": 0.66,
"grad_norm": 1.813565731048584,
"learning_rate": 4.96784806340661e-05,
"loss": 0.3751,
"step": 2200
},
{
"epoch": 0.69,
"grad_norm": 2.279954195022583,
"learning_rate": 4.9663526244952897e-05,
"loss": 0.3804,
"step": 2300
},
{
"epoch": 0.72,
"grad_norm": 1.9376351833343506,
"learning_rate": 4.9648571855839696e-05,
"loss": 0.3611,
"step": 2400
},
{
"epoch": 0.75,
"grad_norm": 2.2867352962493896,
"learning_rate": 4.963361746672648e-05,
"loss": 0.3739,
"step": 2500
},
{
"epoch": 0.78,
"grad_norm": 2.132394313812256,
"learning_rate": 4.961866307761328e-05,
"loss": 0.3669,
"step": 2600
},
{
"epoch": 0.81,
"grad_norm": 2.0541863441467285,
"learning_rate": 4.9603708688500075e-05,
"loss": 0.366,
"step": 2700
},
{
"epoch": 0.84,
"grad_norm": 2.1414847373962402,
"learning_rate": 4.9588754299386874e-05,
"loss": 0.3535,
"step": 2800
},
{
"epoch": 0.87,
"grad_norm": 1.3949612379074097,
"learning_rate": 4.957379991027367e-05,
"loss": 0.3684,
"step": 2900
},
{
"epoch": 0.9,
"grad_norm": 1.8921570777893066,
"learning_rate": 4.955884552116046e-05,
"loss": 0.3556,
"step": 3000
},
{
"epoch": 0.9,
"eval_loss": 0.290554404258728,
"eval_precision": 0.79493216033703,
"eval_recall": 0.7901105329597586,
"eval_runtime": 307.7262,
"eval_samples_per_second": 43.461,
"eval_steps_per_second": 1.358,
"step": 3000
},
{
"epoch": 0.93,
"grad_norm": 1.6217349767684937,
"learning_rate": 4.954389113204726e-05,
"loss": 0.3566,
"step": 3100
},
{
"epoch": 0.96,
"grad_norm": 1.524946928024292,
"learning_rate": 4.952893674293405e-05,
"loss": 0.3477,
"step": 3200
},
{
"epoch": 0.99,
"grad_norm": 1.6807836294174194,
"learning_rate": 4.9513982353820846e-05,
"loss": 0.3409,
"step": 3300
},
{
"epoch": 1.02,
"grad_norm": 1.5750257968902588,
"learning_rate": 4.9499027964707645e-05,
"loss": 0.3178,
"step": 3400
},
{
"epoch": 1.05,
"grad_norm": 1.43153715133667,
"learning_rate": 4.9484073575594445e-05,
"loss": 0.2888,
"step": 3500
},
{
"epoch": 1.08,
"grad_norm": 1.4886215925216675,
"learning_rate": 4.946911918648123e-05,
"loss": 0.3153,
"step": 3600
},
{
"epoch": 1.11,
"grad_norm": 2.2148983478546143,
"learning_rate": 4.945416479736803e-05,
"loss": 0.3114,
"step": 3700
},
{
"epoch": 1.14,
"grad_norm": 1.3632937669754028,
"learning_rate": 4.9439210408254824e-05,
"loss": 0.3031,
"step": 3800
},
{
"epoch": 1.17,
"grad_norm": 1.8350048065185547,
"learning_rate": 4.9424256019141617e-05,
"loss": 0.292,
"step": 3900
},
{
"epoch": 1.2,
"grad_norm": 1.1402252912521362,
"learning_rate": 4.9409301630028416e-05,
"loss": 0.2983,
"step": 4000
},
{
"epoch": 1.2,
"eval_loss": 0.2781643867492676,
"eval_precision": 0.7788883753177721,
"eval_recall": 0.8301363958249947,
"eval_runtime": 307.2732,
"eval_samples_per_second": 43.525,
"eval_steps_per_second": 1.36,
"step": 4000
},
{
"epoch": 1.23,
"grad_norm": 1.2367932796478271,
"learning_rate": 4.939434724091521e-05,
"loss": 0.2894,
"step": 4100
},
{
"epoch": 1.26,
"grad_norm": 1.4055671691894531,
"learning_rate": 4.937939285180201e-05,
"loss": 0.2847,
"step": 4200
},
{
"epoch": 1.29,
"grad_norm": 1.910565972328186,
"learning_rate": 4.93644384626888e-05,
"loss": 0.2917,
"step": 4300
},
{
"epoch": 1.32,
"grad_norm": 1.9085345268249512,
"learning_rate": 4.9349484073575595e-05,
"loss": 0.2934,
"step": 4400
},
{
"epoch": 1.35,
"grad_norm": 1.5550158023834229,
"learning_rate": 4.9334529684462394e-05,
"loss": 0.2726,
"step": 4500
},
{
"epoch": 1.38,
"grad_norm": 2.1685421466827393,
"learning_rate": 4.931957529534919e-05,
"loss": 0.3077,
"step": 4600
},
{
"epoch": 1.41,
"grad_norm": 1.7528005838394165,
"learning_rate": 4.930462090623598e-05,
"loss": 0.2919,
"step": 4700
},
{
"epoch": 1.44,
"grad_norm": 1.804412841796875,
"learning_rate": 4.928966651712278e-05,
"loss": 0.278,
"step": 4800
},
{
"epoch": 1.47,
"grad_norm": 2.430739164352417,
"learning_rate": 4.927471212800957e-05,
"loss": 0.2901,
"step": 4900
},
{
"epoch": 1.5,
"grad_norm": 1.5466407537460327,
"learning_rate": 4.9259757738896365e-05,
"loss": 0.2886,
"step": 5000
},
{
"epoch": 1.5,
"eval_loss": 0.27095386385917664,
"eval_precision": 0.7892478844902066,
"eval_recall": 0.8212999168693618,
"eval_runtime": 308.5531,
"eval_samples_per_second": 43.344,
"eval_steps_per_second": 1.355,
"step": 5000
},
{
"epoch": 1.53,
"grad_norm": 1.1303741931915283,
"learning_rate": 4.9244803349783165e-05,
"loss": 0.291,
"step": 5100
},
{
"epoch": 1.56,
"grad_norm": 1.3640042543411255,
"learning_rate": 4.922984896066996e-05,
"loss": 0.2897,
"step": 5200
},
{
"epoch": 1.58,
"grad_norm": 1.9915575981140137,
"learning_rate": 4.921489457155675e-05,
"loss": 0.2798,
"step": 5300
},
{
"epoch": 1.61,
"grad_norm": 1.574576735496521,
"learning_rate": 4.919994018244355e-05,
"loss": 0.2856,
"step": 5400
},
{
"epoch": 1.64,
"grad_norm": 1.9231148958206177,
"learning_rate": 4.918498579333034e-05,
"loss": 0.2819,
"step": 5500
},
{
"epoch": 1.67,
"grad_norm": 2.171637773513794,
"learning_rate": 4.917003140421714e-05,
"loss": 0.2892,
"step": 5600
},
{
"epoch": 1.7,
"grad_norm": 1.7447925806045532,
"learning_rate": 4.9155077015103936e-05,
"loss": 0.2837,
"step": 5700
},
{
"epoch": 1.73,
"grad_norm": 2.282715320587158,
"learning_rate": 4.914012262599073e-05,
"loss": 0.2888,
"step": 5800
},
{
"epoch": 1.76,
"grad_norm": 2.041062831878662,
"learning_rate": 4.912516823687753e-05,
"loss": 0.2733,
"step": 5900
},
{
"epoch": 1.79,
"grad_norm": 1.3900405168533325,
"learning_rate": 4.911021384776432e-05,
"loss": 0.2982,
"step": 6000
},
{
"epoch": 1.79,
"eval_loss": 0.24861453473567963,
"eval_precision": 0.7945360585297875,
"eval_recall": 0.8426059915637797,
"eval_runtime": 306.7263,
"eval_samples_per_second": 43.602,
"eval_steps_per_second": 1.363,
"step": 6000
},
{
"epoch": 1.82,
"grad_norm": 2.156783103942871,
"learning_rate": 4.9095259458651114e-05,
"loss": 0.2883,
"step": 6100
},
{
"epoch": 1.85,
"grad_norm": 1.6421504020690918,
"learning_rate": 4.9080305069537914e-05,
"loss": 0.2716,
"step": 6200
},
{
"epoch": 1.88,
"grad_norm": 1.6905546188354492,
"learning_rate": 4.906535068042471e-05,
"loss": 0.2775,
"step": 6300
},
{
"epoch": 1.91,
"grad_norm": 1.1936814785003662,
"learning_rate": 4.90503962913115e-05,
"loss": 0.2571,
"step": 6400
},
{
"epoch": 1.94,
"grad_norm": 1.7146382331848145,
"learning_rate": 4.90354419021983e-05,
"loss": 0.2681,
"step": 6500
},
{
"epoch": 1.97,
"grad_norm": 1.5280200242996216,
"learning_rate": 4.902048751308509e-05,
"loss": 0.2655,
"step": 6600
},
{
"epoch": 2.0,
"grad_norm": 1.4756951332092285,
"learning_rate": 4.9005533123971885e-05,
"loss": 0.2554,
"step": 6700
},
{
"epoch": 2.03,
"grad_norm": 1.5664458274841309,
"learning_rate": 4.8990578734858685e-05,
"loss": 0.2125,
"step": 6800
},
{
"epoch": 2.06,
"grad_norm": 1.447304368019104,
"learning_rate": 4.897562434574548e-05,
"loss": 0.2161,
"step": 6900
},
{
"epoch": 2.09,
"grad_norm": 1.8067011833190918,
"learning_rate": 4.896066995663227e-05,
"loss": 0.213,
"step": 7000
},
{
"epoch": 2.09,
"eval_loss": 0.24976512789726257,
"eval_precision": 0.8138389031705227,
"eval_recall": 0.8187752085963238,
"eval_runtime": 305.8458,
"eval_samples_per_second": 43.728,
"eval_steps_per_second": 1.367,
"step": 7000
},
{
"epoch": 2.12,
"grad_norm": 2.7706127166748047,
"learning_rate": 4.894571556751907e-05,
"loss": 0.2186,
"step": 7100
},
{
"epoch": 2.15,
"grad_norm": 2.394275426864624,
"learning_rate": 4.893076117840586e-05,
"loss": 0.2094,
"step": 7200
},
{
"epoch": 2.18,
"grad_norm": 1.9464359283447266,
"learning_rate": 4.891580678929266e-05,
"loss": 0.2278,
"step": 7300
},
{
"epoch": 2.21,
"grad_norm": 2.1283416748046875,
"learning_rate": 4.8900852400179456e-05,
"loss": 0.2174,
"step": 7400
},
{
"epoch": 2.24,
"grad_norm": 1.7853657007217407,
"learning_rate": 4.888589801106625e-05,
"loss": 0.2184,
"step": 7500
},
{
"epoch": 2.27,
"grad_norm": 1.1081209182739258,
"learning_rate": 4.887094362195305e-05,
"loss": 0.2201,
"step": 7600
},
{
"epoch": 2.3,
"grad_norm": 1.3894284963607788,
"learning_rate": 4.885598923283984e-05,
"loss": 0.2213,
"step": 7700
},
{
"epoch": 2.33,
"grad_norm": 2.0615389347076416,
"learning_rate": 4.8841034843726634e-05,
"loss": 0.2217,
"step": 7800
},
{
"epoch": 2.36,
"grad_norm": 1.6415098905563354,
"learning_rate": 4.8826080454613434e-05,
"loss": 0.2266,
"step": 7900
},
{
"epoch": 2.39,
"grad_norm": 3.293736219406128,
"learning_rate": 4.8811126065500226e-05,
"loss": 0.2117,
"step": 8000
},
{
"epoch": 2.39,
"eval_loss": 0.24216407537460327,
"eval_precision": 0.8107814105275881,
"eval_recall": 0.826133809538471,
"eval_runtime": 307.023,
"eval_samples_per_second": 43.56,
"eval_steps_per_second": 1.361,
"step": 8000
},
{
"epoch": 2.42,
"grad_norm": 1.1580455303192139,
"learning_rate": 4.879617167638702e-05,
"loss": 0.2171,
"step": 8100
},
{
"epoch": 2.45,
"grad_norm": 1.0756213665008545,
"learning_rate": 4.878121728727382e-05,
"loss": 0.2174,
"step": 8200
},
{
"epoch": 2.48,
"grad_norm": 1.871605396270752,
"learning_rate": 4.876626289816061e-05,
"loss": 0.215,
"step": 8300
},
{
"epoch": 2.51,
"grad_norm": 1.8400825262069702,
"learning_rate": 4.8751308509047405e-05,
"loss": 0.2215,
"step": 8400
},
{
"epoch": 2.54,
"grad_norm": 2.0464110374450684,
"learning_rate": 4.8736354119934204e-05,
"loss": 0.2195,
"step": 8500
},
{
"epoch": 2.57,
"grad_norm": 1.2704099416732788,
"learning_rate": 4.8721399730821e-05,
"loss": 0.2266,
"step": 8600
},
{
"epoch": 2.6,
"grad_norm": 0.9448720216751099,
"learning_rate": 4.87064453417078e-05,
"loss": 0.2159,
"step": 8700
},
{
"epoch": 2.63,
"grad_norm": 1.2881120443344116,
"learning_rate": 4.869149095259459e-05,
"loss": 0.2084,
"step": 8800
},
{
"epoch": 2.66,
"grad_norm": 2.0659286975860596,
"learning_rate": 4.867653656348138e-05,
"loss": 0.2134,
"step": 8900
},
{
"epoch": 2.69,
"grad_norm": 1.109397530555725,
"learning_rate": 4.866158217436818e-05,
"loss": 0.2129,
"step": 9000
},
{
"epoch": 2.69,
"eval_loss": 0.22735044360160828,
"eval_precision": 0.8203027060082556,
"eval_recall": 0.8260106530373472,
"eval_runtime": 305.794,
"eval_samples_per_second": 43.735,
"eval_steps_per_second": 1.367,
"step": 9000
},
{
"epoch": 2.72,
"grad_norm": 1.164435625076294,
"learning_rate": 4.8646627785254975e-05,
"loss": 0.2155,
"step": 9100
},
{
"epoch": 2.75,
"grad_norm": 1.5477757453918457,
"learning_rate": 4.863167339614177e-05,
"loss": 0.2137,
"step": 9200
},
{
"epoch": 2.78,
"grad_norm": 1.4342052936553955,
"learning_rate": 4.861671900702857e-05,
"loss": 0.206,
"step": 9300
},
{
"epoch": 2.81,
"grad_norm": 1.3847391605377197,
"learning_rate": 4.860176461791536e-05,
"loss": 0.2077,
"step": 9400
},
{
"epoch": 2.84,
"grad_norm": 2.9082765579223633,
"learning_rate": 4.8586810228802154e-05,
"loss": 0.2126,
"step": 9500
},
{
"epoch": 2.87,
"grad_norm": 1.4943510293960571,
"learning_rate": 4.857185583968895e-05,
"loss": 0.2092,
"step": 9600
},
{
"epoch": 2.9,
"grad_norm": 1.2332855463027954,
"learning_rate": 4.8556901450575746e-05,
"loss": 0.2222,
"step": 9700
},
{
"epoch": 2.93,
"grad_norm": 2.227031946182251,
"learning_rate": 4.854194706146254e-05,
"loss": 0.1969,
"step": 9800
},
{
"epoch": 2.96,
"grad_norm": 1.2515846490859985,
"learning_rate": 4.852699267234934e-05,
"loss": 0.2017,
"step": 9900
},
{
"epoch": 2.99,
"grad_norm": 1.2267186641693115,
"learning_rate": 4.851203828323613e-05,
"loss": 0.2126,
"step": 10000
},
{
"epoch": 2.99,
"eval_loss": 0.20952437818050385,
"eval_precision": 0.8416687769055458,
"eval_recall": 0.818682841220481,
"eval_runtime": 302.8923,
"eval_samples_per_second": 44.154,
"eval_steps_per_second": 1.38,
"step": 10000
},
{
"epoch": 3.02,
"grad_norm": 1.151638150215149,
"learning_rate": 4.849708389412293e-05,
"loss": 0.171,
"step": 10100
},
{
"epoch": 3.05,
"grad_norm": 3.8168528079986572,
"learning_rate": 4.8482129505009724e-05,
"loss": 0.165,
"step": 10200
},
{
"epoch": 3.08,
"grad_norm": 2.3039355278015137,
"learning_rate": 4.846717511589652e-05,
"loss": 0.1675,
"step": 10300
},
{
"epoch": 3.11,
"grad_norm": 1.252301812171936,
"learning_rate": 4.845222072678332e-05,
"loss": 0.1554,
"step": 10400
},
{
"epoch": 3.14,
"grad_norm": 1.2682992219924927,
"learning_rate": 4.843726633767011e-05,
"loss": 0.1756,
"step": 10500
},
{
"epoch": 3.17,
"grad_norm": 1.3934777975082397,
"learning_rate": 4.84223119485569e-05,
"loss": 0.1576,
"step": 10600
},
{
"epoch": 3.2,
"grad_norm": 1.3386119604110718,
"learning_rate": 4.84073575594437e-05,
"loss": 0.1602,
"step": 10700
},
{
"epoch": 3.23,
"grad_norm": 1.6670503616333008,
"learning_rate": 4.8392403170330495e-05,
"loss": 0.1638,
"step": 10800
},
{
"epoch": 3.26,
"grad_norm": 2.5150694847106934,
"learning_rate": 4.837744878121729e-05,
"loss": 0.1653,
"step": 10900
},
{
"epoch": 3.29,
"grad_norm": 2.840406656265259,
"learning_rate": 4.836249439210409e-05,
"loss": 0.1607,
"step": 11000
},
{
"epoch": 3.29,
"eval_loss": 0.22238589823246002,
"eval_precision": 0.8404415146405029,
"eval_recall": 0.8439607130761415,
"eval_runtime": 304.8188,
"eval_samples_per_second": 43.875,
"eval_steps_per_second": 1.371,
"step": 11000
},
{
"epoch": 3.32,
"grad_norm": 1.5171958208084106,
"learning_rate": 4.834754000299088e-05,
"loss": 0.1606,
"step": 11100
},
{
"epoch": 3.35,
"grad_norm": 1.6955703496932983,
"learning_rate": 4.833258561387767e-05,
"loss": 0.1554,
"step": 11200
},
{
"epoch": 3.38,
"grad_norm": 1.893128514289856,
"learning_rate": 4.831763122476447e-05,
"loss": 0.1488,
"step": 11300
},
{
"epoch": 3.41,
"grad_norm": 1.7299461364746094,
"learning_rate": 4.8302676835651266e-05,
"loss": 0.1596,
"step": 11400
},
{
"epoch": 3.44,
"grad_norm": 2.150355339050293,
"learning_rate": 4.8287722446538065e-05,
"loss": 0.1623,
"step": 11500
},
{
"epoch": 3.47,
"grad_norm": 3.2869186401367188,
"learning_rate": 4.827276805742486e-05,
"loss": 0.1622,
"step": 11600
},
{
"epoch": 3.5,
"grad_norm": 1.7936344146728516,
"learning_rate": 4.825781366831165e-05,
"loss": 0.1651,
"step": 11700
},
{
"epoch": 3.53,
"grad_norm": 1.579736590385437,
"learning_rate": 4.824285927919845e-05,
"loss": 0.169,
"step": 11800
},
{
"epoch": 3.56,
"grad_norm": 2.1929283142089844,
"learning_rate": 4.822790489008524e-05,
"loss": 0.1629,
"step": 11900
},
{
"epoch": 3.59,
"grad_norm": 1.7842892408370972,
"learning_rate": 4.821295050097204e-05,
"loss": 0.1621,
"step": 12000
},
{
"epoch": 3.59,
"eval_loss": 0.21504360437393188,
"eval_precision": 0.8350246187102197,
"eval_recall": 0.8563379414390837,
"eval_runtime": 306.2124,
"eval_samples_per_second": 43.676,
"eval_steps_per_second": 1.365,
"step": 12000
},
{
"epoch": 3.62,
"grad_norm": 2.2203197479248047,
"learning_rate": 4.8197996111858836e-05,
"loss": 0.1595,
"step": 12100
},
{
"epoch": 3.65,
"grad_norm": 1.8541319370269775,
"learning_rate": 4.818304172274562e-05,
"loss": 0.1702,
"step": 12200
},
{
"epoch": 3.68,
"grad_norm": 1.3299143314361572,
"learning_rate": 4.816808733363242e-05,
"loss": 0.1651,
"step": 12300
},
{
"epoch": 3.71,
"grad_norm": 1.7831319570541382,
"learning_rate": 4.815313294451922e-05,
"loss": 0.1601,
"step": 12400
},
{
"epoch": 3.74,
"grad_norm": 1.0528268814086914,
"learning_rate": 4.8138178555406015e-05,
"loss": 0.1644,
"step": 12500
},
{
"epoch": 3.77,
"grad_norm": 1.306907057762146,
"learning_rate": 4.812322416629281e-05,
"loss": 0.1556,
"step": 12600
},
{
"epoch": 3.8,
"grad_norm": 1.8565049171447754,
"learning_rate": 4.810826977717961e-05,
"loss": 0.1654,
"step": 12700
},
{
"epoch": 3.83,
"grad_norm": 1.4770090579986572,
"learning_rate": 4.80933153880664e-05,
"loss": 0.1628,
"step": 12800
},
{
"epoch": 3.86,
"grad_norm": 1.9089502096176147,
"learning_rate": 4.807836099895319e-05,
"loss": 0.1632,
"step": 12900
},
{
"epoch": 3.89,
"grad_norm": 1.3788821697235107,
"learning_rate": 4.806340660983999e-05,
"loss": 0.1597,
"step": 13000
},
{
"epoch": 3.89,
"eval_loss": 0.2062728852033615,
"eval_precision": 0.8378547953391097,
"eval_recall": 0.8634194402537024,
"eval_runtime": 304.7295,
"eval_samples_per_second": 43.888,
"eval_steps_per_second": 1.372,
"step": 13000
},
{
"epoch": 3.92,
"grad_norm": 15.79686164855957,
"learning_rate": 4.8048452220726785e-05,
"loss": 0.1637,
"step": 13100
},
{
"epoch": 3.95,
"grad_norm": 1.9472129344940186,
"learning_rate": 4.8033497831613585e-05,
"loss": 0.1666,
"step": 13200
},
{
"epoch": 3.98,
"grad_norm": 2.1338746547698975,
"learning_rate": 4.801854344250037e-05,
"loss": 0.1614,
"step": 13300
},
{
"epoch": 4.01,
"grad_norm": 1.1886940002441406,
"learning_rate": 4.800358905338717e-05,
"loss": 0.1474,
"step": 13400
},
{
"epoch": 4.04,
"grad_norm": 2.4190924167633057,
"learning_rate": 4.798863466427397e-05,
"loss": 0.121,
"step": 13500
},
{
"epoch": 4.07,
"grad_norm": 0.902584433555603,
"learning_rate": 4.797368027516076e-05,
"loss": 0.1192,
"step": 13600
},
{
"epoch": 4.1,
"grad_norm": 2.3466804027557373,
"learning_rate": 4.7958725886047556e-05,
"loss": 0.129,
"step": 13700
},
{
"epoch": 4.13,
"grad_norm": 4.135778427124023,
"learning_rate": 4.7943771496934356e-05,
"loss": 0.1206,
"step": 13800
},
{
"epoch": 4.16,
"grad_norm": 1.6940075159072876,
"learning_rate": 4.792881710782115e-05,
"loss": 0.1313,
"step": 13900
},
{
"epoch": 4.19,
"grad_norm": 1.7989047765731812,
"learning_rate": 4.791386271870794e-05,
"loss": 0.1139,
"step": 14000
},
{
"epoch": 4.19,
"eval_loss": 0.20718763768672943,
"eval_precision": 0.8631126181281592,
"eval_recall": 0.8464238430986176,
"eval_runtime": 304.0256,
"eval_samples_per_second": 43.99,
"eval_steps_per_second": 1.375,
"step": 14000
},
{
"epoch": 4.22,
"grad_norm": 1.9864155054092407,
"learning_rate": 4.789890832959474e-05,
"loss": 0.1222,
"step": 14100
},
{
"epoch": 4.25,
"grad_norm": 2.944260835647583,
"learning_rate": 4.7883953940481534e-05,
"loss": 0.1238,
"step": 14200
},
{
"epoch": 4.28,
"grad_norm": 0.5448206663131714,
"learning_rate": 4.786899955136833e-05,
"loss": 0.1191,
"step": 14300
},
{
"epoch": 4.31,
"grad_norm": 1.2996718883514404,
"learning_rate": 4.785404516225512e-05,
"loss": 0.1208,
"step": 14400
},
{
"epoch": 4.34,
"grad_norm": 2.5177977085113525,
"learning_rate": 4.783909077314192e-05,
"loss": 0.1258,
"step": 14500
},
{
"epoch": 4.37,
"grad_norm": 1.1356126070022583,
"learning_rate": 4.782413638402872e-05,
"loss": 0.1223,
"step": 14600
},
{
"epoch": 4.4,
"grad_norm": 1.2576464414596558,
"learning_rate": 4.7809181994915506e-05,
"loss": 0.124,
"step": 14700
},
{
"epoch": 4.43,
"grad_norm": 0.8868162631988525,
"learning_rate": 4.7794227605802305e-05,
"loss": 0.1246,
"step": 14800
},
{
"epoch": 4.46,
"grad_norm": 2.3075501918792725,
"learning_rate": 4.7779273216689105e-05,
"loss": 0.1216,
"step": 14900
},
{
"epoch": 4.49,
"grad_norm": 1.5548241138458252,
"learning_rate": 4.776431882757589e-05,
"loss": 0.1221,
"step": 15000
},
{
"epoch": 4.49,
"eval_loss": 0.19333235919475555,
"eval_precision": 0.8727586319112239,
"eval_recall": 0.8257335509098187,
"eval_runtime": 301.0242,
"eval_samples_per_second": 44.428,
"eval_steps_per_second": 1.389,
"step": 15000
},
{
"epoch": 4.52,
"grad_norm": 1.0018868446350098,
"learning_rate": 4.774936443846269e-05,
"loss": 0.1237,
"step": 15100
},
{
"epoch": 4.55,
"grad_norm": 1.264910101890564,
"learning_rate": 4.773441004934949e-05,
"loss": 0.1156,
"step": 15200
},
{
"epoch": 4.58,
"grad_norm": 5.281520366668701,
"learning_rate": 4.771945566023628e-05,
"loss": 0.1286,
"step": 15300
},
{
"epoch": 4.61,
"grad_norm": 1.9591494798660278,
"learning_rate": 4.7704501271123076e-05,
"loss": 0.1249,
"step": 15400
},
{
"epoch": 4.64,
"grad_norm": 2.021794080734253,
"learning_rate": 4.768954688200987e-05,
"loss": 0.1233,
"step": 15500
},
{
"epoch": 4.67,
"grad_norm": 2.007873773574829,
"learning_rate": 4.767459249289667e-05,
"loss": 0.1281,
"step": 15600
},
{
"epoch": 4.69,
"grad_norm": 2.0108394622802734,
"learning_rate": 4.765963810378346e-05,
"loss": 0.1302,
"step": 15700
},
{
"epoch": 4.72,
"grad_norm": 1.7474627494812012,
"learning_rate": 4.7644683714670254e-05,
"loss": 0.1164,
"step": 15800
},
{
"epoch": 4.75,
"grad_norm": 0.758482813835144,
"learning_rate": 4.7629729325557054e-05,
"loss": 0.1211,
"step": 15900
},
{
"epoch": 4.78,
"grad_norm": 0.9910192489624023,
"learning_rate": 4.7614774936443854e-05,
"loss": 0.1222,
"step": 16000
},
{
"epoch": 4.78,
"eval_loss": 0.1955721527338028,
"eval_precision": 0.8685029567382508,
"eval_recall": 0.8591705409649312,
"eval_runtime": 303.5505,
"eval_samples_per_second": 44.059,
"eval_steps_per_second": 1.377,
"step": 16000
},
{
"epoch": 4.81,
"grad_norm": 2.4667110443115234,
"learning_rate": 4.759982054733064e-05,
"loss": 0.1214,
"step": 16100
},
{
"epoch": 4.84,
"grad_norm": 2.103156566619873,
"learning_rate": 4.758486615821744e-05,
"loss": 0.1211,
"step": 16200
},
{
"epoch": 4.87,
"grad_norm": 1.3806654214859009,
"learning_rate": 4.756991176910424e-05,
"loss": 0.1152,
"step": 16300
},
{
"epoch": 4.9,
"grad_norm": 2.1174566745758057,
"learning_rate": 4.7554957379991025e-05,
"loss": 0.1246,
"step": 16400
},
{
"epoch": 4.93,
"grad_norm": 2.0334010124206543,
"learning_rate": 4.7540002990877825e-05,
"loss": 0.1189,
"step": 16500
},
{
"epoch": 4.96,
"grad_norm": 2.668717861175537,
"learning_rate": 4.7525048601764625e-05,
"loss": 0.1237,
"step": 16600
},
{
"epoch": 4.99,
"grad_norm": 2.0749363899230957,
"learning_rate": 4.751009421265142e-05,
"loss": 0.1141,
"step": 16700
},
{
"epoch": 5.02,
"grad_norm": 1.893052577972412,
"learning_rate": 4.749513982353821e-05,
"loss": 0.095,
"step": 16800
},
{
"epoch": 5.05,
"grad_norm": 0.6495729684829712,
"learning_rate": 4.7480185434425e-05,
"loss": 0.085,
"step": 16900
},
{
"epoch": 5.08,
"grad_norm": 1.8883150815963745,
"learning_rate": 4.74652310453118e-05,
"loss": 0.0886,
"step": 17000
},
{
"epoch": 5.08,
"eval_loss": 0.2067934274673462,
"eval_precision": 0.880300808187974,
"eval_recall": 0.8685920133009021,
"eval_runtime": 303.377,
"eval_samples_per_second": 44.084,
"eval_steps_per_second": 1.378,
"step": 17000
},
{
"epoch": 5.11,
"grad_norm": 1.110809326171875,
"learning_rate": 4.7450276656198596e-05,
"loss": 0.0895,
"step": 17100
},
{
"epoch": 5.14,
"grad_norm": 1.9441896677017212,
"learning_rate": 4.743532226708539e-05,
"loss": 0.0935,
"step": 17200
},
{
"epoch": 5.17,
"grad_norm": 1.9851264953613281,
"learning_rate": 4.742036787797219e-05,
"loss": 0.0927,
"step": 17300
},
{
"epoch": 5.2,
"grad_norm": 1.2447096109390259,
"learning_rate": 4.740541348885899e-05,
"loss": 0.0911,
"step": 17400
},
{
"epoch": 5.23,
"grad_norm": 1.0151656866073608,
"learning_rate": 4.7390459099745774e-05,
"loss": 0.0932,
"step": 17500
},
{
"epoch": 5.26,
"grad_norm": 0.8265299201011658,
"learning_rate": 4.7375504710632574e-05,
"loss": 0.1006,
"step": 17600
},
{
"epoch": 5.29,
"grad_norm": 2.7819435596466064,
"learning_rate": 4.736055032151937e-05,
"loss": 0.0892,
"step": 17700
},
{
"epoch": 5.32,
"grad_norm": 1.3706836700439453,
"learning_rate": 4.734559593240616e-05,
"loss": 0.0976,
"step": 17800
},
{
"epoch": 5.35,
"grad_norm": 3.606653928756714,
"learning_rate": 4.733064154329296e-05,
"loss": 0.0932,
"step": 17900
},
{
"epoch": 5.38,
"grad_norm": 1.3535112142562866,
"learning_rate": 4.731568715417975e-05,
"loss": 0.0917,
"step": 18000
},
{
"epoch": 5.38,
"eval_loss": 0.1965586394071579,
"eval_precision": 0.8806825297432687,
"eval_recall": 0.8660673050278641,
"eval_runtime": 303.4486,
"eval_samples_per_second": 44.073,
"eval_steps_per_second": 1.377,
"step": 18000
},
{
"epoch": 5.41,
"grad_norm": 1.7558257579803467,
"learning_rate": 4.7300732765066545e-05,
"loss": 0.088,
"step": 18100
},
{
"epoch": 5.44,
"grad_norm": 2.291628837585449,
"learning_rate": 4.7285778375953345e-05,
"loss": 0.0963,
"step": 18200
},
{
"epoch": 5.47,
"grad_norm": 1.4217274188995361,
"learning_rate": 4.727082398684014e-05,
"loss": 0.0969,
"step": 18300
},
{
"epoch": 5.5,
"grad_norm": 1.8852524757385254,
"learning_rate": 4.725586959772694e-05,
"loss": 0.0952,
"step": 18400
},
{
"epoch": 5.53,
"grad_norm": 2.106452465057373,
"learning_rate": 4.724091520861373e-05,
"loss": 0.0966,
"step": 18500
},
{
"epoch": 5.56,
"grad_norm": 1.9277011156082153,
"learning_rate": 4.722596081950052e-05,
"loss": 0.089,
"step": 18600
},
{
"epoch": 5.59,
"grad_norm": 1.2175403833389282,
"learning_rate": 4.721100643038732e-05,
"loss": 0.0931,
"step": 18700
},
{
"epoch": 5.62,
"grad_norm": 2.060368299484253,
"learning_rate": 4.7196052041274115e-05,
"loss": 0.0968,
"step": 18800
},
{
"epoch": 5.65,
"grad_norm": 1.4981082677841187,
"learning_rate": 4.718109765216091e-05,
"loss": 0.0929,
"step": 18900
},
{
"epoch": 5.68,
"grad_norm": 1.6335569620132446,
"learning_rate": 4.716614326304771e-05,
"loss": 0.0938,
"step": 19000
},
{
"epoch": 5.68,
"eval_loss": 0.19031907618045807,
"eval_precision": 0.8913960623881361,
"eval_recall": 0.858708704085717,
"eval_runtime": 301.9634,
"eval_samples_per_second": 44.29,
"eval_steps_per_second": 1.384,
"step": 19000
},
{
"epoch": 5.71,
"grad_norm": 0.46949952840805054,
"learning_rate": 4.71511888739345e-05,
"loss": 0.09,
"step": 19100
},
{
"epoch": 5.74,
"grad_norm": 2.6525633335113525,
"learning_rate": 4.7136234484821294e-05,
"loss": 0.0954,
"step": 19200
},
{
"epoch": 5.77,
"grad_norm": 1.2892892360687256,
"learning_rate": 4.7121280095708093e-05,
"loss": 0.0949,
"step": 19300
},
{
"epoch": 5.8,
"grad_norm": 1.5637331008911133,
"learning_rate": 4.7106325706594886e-05,
"loss": 0.0962,
"step": 19400
},
{
"epoch": 5.83,
"grad_norm": 2.5609443187713623,
"learning_rate": 4.709137131748168e-05,
"loss": 0.0921,
"step": 19500
},
{
"epoch": 5.86,
"grad_norm": 1.4690775871276855,
"learning_rate": 4.707641692836848e-05,
"loss": 0.0955,
"step": 19600
},
{
"epoch": 5.89,
"grad_norm": 1.081965684890747,
"learning_rate": 4.706146253925527e-05,
"loss": 0.0928,
"step": 19700
},
{
"epoch": 5.92,
"grad_norm": 1.6817141771316528,
"learning_rate": 4.704650815014207e-05,
"loss": 0.0963,
"step": 19800
},
{
"epoch": 5.95,
"grad_norm": 2.984762191772461,
"learning_rate": 4.7031553761028864e-05,
"loss": 0.095,
"step": 19900
},
{
"epoch": 5.98,
"grad_norm": 2.1594882011413574,
"learning_rate": 4.701659937191566e-05,
"loss": 0.0985,
"step": 20000
},
{
"epoch": 5.98,
"eval_loss": 0.18151727318763733,
"eval_precision": 0.9042639298086573,
"eval_recall": 0.859940269096955,
"eval_runtime": 302.8985,
"eval_samples_per_second": 44.153,
"eval_steps_per_second": 1.38,
"step": 20000
},
{
"epoch": 6.01,
"grad_norm": 2.0218722820281982,
"learning_rate": 4.700164498280246e-05,
"loss": 0.0886,
"step": 20100
},
{
"epoch": 6.04,
"grad_norm": 1.3569700717926025,
"learning_rate": 4.698669059368925e-05,
"loss": 0.0711,
"step": 20200
},
{
"epoch": 6.07,
"grad_norm": 1.5697298049926758,
"learning_rate": 4.697173620457604e-05,
"loss": 0.0724,
"step": 20300
},
{
"epoch": 6.1,
"grad_norm": 1.7853014469146729,
"learning_rate": 4.695678181546284e-05,
"loss": 0.0747,
"step": 20400
},
{
"epoch": 6.13,
"grad_norm": 0.7531015872955322,
"learning_rate": 4.6941827426349635e-05,
"loss": 0.074,
"step": 20500
},
{
"epoch": 6.16,
"grad_norm": 1.3895870447158813,
"learning_rate": 4.692687303723643e-05,
"loss": 0.0683,
"step": 20600
},
{
"epoch": 6.19,
"grad_norm": 2.084857225418091,
"learning_rate": 4.691191864812323e-05,
"loss": 0.0741,
"step": 20700
},
{
"epoch": 6.22,
"grad_norm": 0.9525838494300842,
"learning_rate": 4.689696425901002e-05,
"loss": 0.0647,
"step": 20800
},
{
"epoch": 6.25,
"grad_norm": 2.0475118160247803,
"learning_rate": 4.6882009869896813e-05,
"loss": 0.0746,
"step": 20900
},
{
"epoch": 6.28,
"grad_norm": 1.0650370121002197,
"learning_rate": 4.686705548078361e-05,
"loss": 0.0696,
"step": 21000
},
{
"epoch": 6.28,
"eval_loss": 0.19116894900798798,
"eval_precision": 0.9016753284483037,
"eval_recall": 0.8600326364727978,
"eval_runtime": 303.289,
"eval_samples_per_second": 44.097,
"eval_steps_per_second": 1.378,
"step": 21000
},
{
"epoch": 6.31,
"grad_norm": 1.5736846923828125,
"learning_rate": 4.6852101091670406e-05,
"loss": 0.0685,
"step": 21100
},
{
"epoch": 6.34,
"grad_norm": 0.7526031136512756,
"learning_rate": 4.6837146702557206e-05,
"loss": 0.0816,
"step": 21200
},
{
"epoch": 6.37,
"grad_norm": 1.284680724143982,
"learning_rate": 4.6822192313444e-05,
"loss": 0.0676,
"step": 21300
},
{
"epoch": 6.4,
"grad_norm": 4.207923889160156,
"learning_rate": 4.680723792433079e-05,
"loss": 0.0679,
"step": 21400
},
{
"epoch": 6.43,
"grad_norm": 1.3670810461044312,
"learning_rate": 4.679228353521759e-05,
"loss": 0.0721,
"step": 21500
},
{
"epoch": 6.46,
"grad_norm": 1.8094091415405273,
"learning_rate": 4.6777329146104384e-05,
"loss": 0.0673,
"step": 21600
},
{
"epoch": 6.49,
"grad_norm": 2.057133436203003,
"learning_rate": 4.676237475699118e-05,
"loss": 0.0711,
"step": 21700
},
{
"epoch": 6.52,
"grad_norm": 1.9356772899627686,
"learning_rate": 4.6747420367877976e-05,
"loss": 0.0713,
"step": 21800
},
{
"epoch": 6.55,
"grad_norm": 0.4188990592956543,
"learning_rate": 4.673246597876477e-05,
"loss": 0.0772,
"step": 21900
},
{
"epoch": 6.58,
"grad_norm": 0.9256879091262817,
"learning_rate": 4.671751158965156e-05,
"loss": 0.0715,
"step": 22000
},
{
"epoch": 6.58,
"eval_loss": 0.19474047422409058,
"eval_precision": 0.9012208304190246,
"eval_recall": 0.8727793343391115,
"eval_runtime": 305.0313,
"eval_samples_per_second": 43.845,
"eval_steps_per_second": 1.37,
"step": 22000
},
{
"epoch": 6.61,
"grad_norm": 0.890701949596405,
"learning_rate": 4.670255720053836e-05,
"loss": 0.0712,
"step": 22100
},
{
"epoch": 6.64,
"grad_norm": 1.6164826154708862,
"learning_rate": 4.6687602811425155e-05,
"loss": 0.0772,
"step": 22200
},
{
"epoch": 6.67,
"grad_norm": 1.2075903415679932,
"learning_rate": 4.667264842231195e-05,
"loss": 0.0734,
"step": 22300
},
{
"epoch": 6.7,
"grad_norm": 0.9141576886177063,
"learning_rate": 4.665769403319875e-05,
"loss": 0.0803,
"step": 22400
},
{
"epoch": 6.73,
"grad_norm": 3.0547311305999756,
"learning_rate": 4.664273964408554e-05,
"loss": 0.0688,
"step": 22500
},
{
"epoch": 6.76,
"grad_norm": 1.1152849197387695,
"learning_rate": 4.662778525497234e-05,
"loss": 0.0703,
"step": 22600
},
{
"epoch": 6.79,
"grad_norm": 2.150590181350708,
"learning_rate": 4.661283086585913e-05,
"loss": 0.0745,
"step": 22700
},
{
"epoch": 6.82,
"grad_norm": 1.4829721450805664,
"learning_rate": 4.6597876476745926e-05,
"loss": 0.0738,
"step": 22800
},
{
"epoch": 6.85,
"grad_norm": 0.6545503735542297,
"learning_rate": 4.6582922087632725e-05,
"loss": 0.0764,
"step": 22900
},
{
"epoch": 6.88,
"grad_norm": 1.2322636842727661,
"learning_rate": 4.656796769851952e-05,
"loss": 0.0765,
"step": 23000
},
{
"epoch": 6.88,
"eval_loss": 0.18639414012432098,
"eval_precision": 0.9072111489223789,
"eval_recall": 0.861849194864374,
"eval_runtime": 301.5834,
"eval_samples_per_second": 44.346,
"eval_steps_per_second": 1.386,
"step": 23000
},
{
"epoch": 6.91,
"grad_norm": 1.8931362628936768,
"learning_rate": 4.655301330940631e-05,
"loss": 0.0783,
"step": 23100
},
{
"epoch": 6.94,
"grad_norm": 0.7884649038314819,
"learning_rate": 4.653805892029311e-05,
"loss": 0.0718,
"step": 23200
},
{
"epoch": 6.97,
"grad_norm": 0.6341440081596375,
"learning_rate": 4.6523104531179904e-05,
"loss": 0.0698,
"step": 23300
},
{
"epoch": 7.0,
"grad_norm": 0.9098210334777832,
"learning_rate": 4.6508150142066697e-05,
"loss": 0.071,
"step": 23400
},
{
"epoch": 7.03,
"grad_norm": 3.0700671672821045,
"learning_rate": 4.6493195752953496e-05,
"loss": 0.0552,
"step": 23500
},
{
"epoch": 7.06,
"grad_norm": 1.5736912488937378,
"learning_rate": 4.647824136384029e-05,
"loss": 0.055,
"step": 23600
},
{
"epoch": 7.09,
"grad_norm": 0.9347396492958069,
"learning_rate": 4.646328697472708e-05,
"loss": 0.0592,
"step": 23700
},
{
"epoch": 7.12,
"grad_norm": 1.7453091144561768,
"learning_rate": 4.644833258561388e-05,
"loss": 0.0623,
"step": 23800
},
{
"epoch": 7.15,
"grad_norm": 1.1539710760116577,
"learning_rate": 4.6433378196500674e-05,
"loss": 0.0558,
"step": 23900
},
{
"epoch": 7.18,
"grad_norm": 0.7530619502067566,
"learning_rate": 4.641842380738747e-05,
"loss": 0.0546,
"step": 24000
},
{
"epoch": 7.18,
"eval_loss": 0.2078467607498169,
"eval_precision": 0.908101688386724,
"eval_recall": 0.8710551433233782,
"eval_runtime": 302.902,
"eval_samples_per_second": 44.153,
"eval_steps_per_second": 1.38,
"step": 24000
},
{
"epoch": 7.21,
"grad_norm": 1.6339865922927856,
"learning_rate": 4.640346941827427e-05,
"loss": 0.0579,
"step": 24100
},
{
"epoch": 7.24,
"grad_norm": 2.397862434387207,
"learning_rate": 4.638851502916106e-05,
"loss": 0.054,
"step": 24200
},
{
"epoch": 7.27,
"grad_norm": 2.5979652404785156,
"learning_rate": 4.637356064004786e-05,
"loss": 0.0582,
"step": 24300
},
{
"epoch": 7.3,
"grad_norm": 1.4249415397644043,
"learning_rate": 4.635860625093465e-05,
"loss": 0.0611,
"step": 24400
},
{
"epoch": 7.33,
"grad_norm": 1.1104274988174438,
"learning_rate": 4.6343651861821445e-05,
"loss": 0.0603,
"step": 24500
},
{
"epoch": 7.36,
"grad_norm": 1.039832353591919,
"learning_rate": 4.6328697472708245e-05,
"loss": 0.06,
"step": 24600
},
{
"epoch": 7.39,
"grad_norm": 1.1284308433532715,
"learning_rate": 4.631374308359504e-05,
"loss": 0.0528,
"step": 24700
},
{
"epoch": 7.42,
"grad_norm": 3.3189823627471924,
"learning_rate": 4.629878869448183e-05,
"loss": 0.0634,
"step": 24800
},
{
"epoch": 7.45,
"grad_norm": 2.0465550422668457,
"learning_rate": 4.628383430536863e-05,
"loss": 0.0599,
"step": 24900
},
{
"epoch": 7.48,
"grad_norm": 1.93597412109375,
"learning_rate": 4.626887991625542e-05,
"loss": 0.0588,
"step": 25000
},
{
"epoch": 7.48,
"eval_loss": 0.20041726529598236,
"eval_precision": 0.9101642057026477,
"eval_recall": 0.8805997721604729,
"eval_runtime": 302.521,
"eval_samples_per_second": 44.209,
"eval_steps_per_second": 1.382,
"step": 25000
},
{
"epoch": 7.51,
"grad_norm": 2.2025020122528076,
"learning_rate": 4.6253925527142216e-05,
"loss": 0.0557,
"step": 25100
},
{
"epoch": 7.54,
"grad_norm": 2.4900927543640137,
"learning_rate": 4.6238971138029016e-05,
"loss": 0.0613,
"step": 25200
},
{
"epoch": 7.57,
"grad_norm": 1.2546288967132568,
"learning_rate": 4.622401674891581e-05,
"loss": 0.0609,
"step": 25300
},
{
"epoch": 7.6,
"grad_norm": 1.3969674110412598,
"learning_rate": 4.62090623598026e-05,
"loss": 0.0617,
"step": 25400
},
{
"epoch": 7.63,
"grad_norm": 0.2969658374786377,
"learning_rate": 4.61941079706894e-05,
"loss": 0.0602,
"step": 25500
},
{
"epoch": 7.66,
"grad_norm": 0.7388882040977478,
"learning_rate": 4.6179153581576194e-05,
"loss": 0.0593,
"step": 25600
},
{
"epoch": 7.69,
"grad_norm": 0.609923779964447,
"learning_rate": 4.6164199192462994e-05,
"loss": 0.0596,
"step": 25700
},
{
"epoch": 7.72,
"grad_norm": 2.3986215591430664,
"learning_rate": 4.614924480334979e-05,
"loss": 0.0651,
"step": 25800
},
{
"epoch": 7.75,
"grad_norm": 1.1203041076660156,
"learning_rate": 4.613429041423658e-05,
"loss": 0.0649,
"step": 25900
},
{
"epoch": 7.78,
"grad_norm": 0.7929214835166931,
"learning_rate": 4.611933602512338e-05,
"loss": 0.0648,
"step": 26000
},
{
"epoch": 7.78,
"eval_loss": 0.19321496784687042,
"eval_precision": 0.9163062916598927,
"eval_recall": 0.8676683395424736,
"eval_runtime": 301.2643,
"eval_samples_per_second": 44.393,
"eval_steps_per_second": 1.387,
"step": 26000
},
{
"epoch": 7.81,
"grad_norm": 0.5828276872634888,
"learning_rate": 4.610438163601017e-05,
"loss": 0.058,
"step": 26100
},
{
"epoch": 7.83,
"grad_norm": 0.44025149941444397,
"learning_rate": 4.6089427246896965e-05,
"loss": 0.0598,
"step": 26200
},
{
"epoch": 7.86,
"grad_norm": 0.7976229786872864,
"learning_rate": 4.6074472857783765e-05,
"loss": 0.0655,
"step": 26300
},
{
"epoch": 7.89,
"grad_norm": 2.6843769550323486,
"learning_rate": 4.605951846867056e-05,
"loss": 0.0588,
"step": 26400
},
{
"epoch": 7.92,
"grad_norm": 1.1365008354187012,
"learning_rate": 4.604456407955735e-05,
"loss": 0.0563,
"step": 26500
},
{
"epoch": 7.95,
"grad_norm": 2.463488817214966,
"learning_rate": 4.602960969044415e-05,
"loss": 0.0581,
"step": 26600
},
{
"epoch": 7.98,
"grad_norm": 0.47716620564460754,
"learning_rate": 4.601465530133094e-05,
"loss": 0.0595,
"step": 26700
},
{
"epoch": 8.01,
"grad_norm": 1.3218754529953003,
"learning_rate": 4.5999700912217736e-05,
"loss": 0.0554,
"step": 26800
},
{
"epoch": 8.04,
"grad_norm": 1.0640392303466797,
"learning_rate": 4.5984746523104536e-05,
"loss": 0.0409,
"step": 26900
},
{
"epoch": 8.07,
"grad_norm": 0.7323993444442749,
"learning_rate": 4.596979213399133e-05,
"loss": 0.0463,
"step": 27000
},
{
"epoch": 8.07,
"eval_loss": 0.21357020735740662,
"eval_precision": 0.9223724947042529,
"eval_recall": 0.8714246128267495,
"eval_runtime": 301.9271,
"eval_samples_per_second": 44.295,
"eval_steps_per_second": 1.384,
"step": 27000
},
{
"epoch": 8.1,
"grad_norm": 2.1960983276367188,
"learning_rate": 4.595483774487813e-05,
"loss": 0.0424,
"step": 27100
},
{
"epoch": 8.13,
"grad_norm": 2.5061357021331787,
"learning_rate": 4.593988335576492e-05,
"loss": 0.0436,
"step": 27200
},
{
"epoch": 8.16,
"grad_norm": 0.5249370336532593,
"learning_rate": 4.5924928966651714e-05,
"loss": 0.0537,
"step": 27300
},
{
"epoch": 8.19,
"grad_norm": 1.0211517810821533,
"learning_rate": 4.5909974577538514e-05,
"loss": 0.0448,
"step": 27400
},
{
"epoch": 8.22,
"grad_norm": 2.860835552215576,
"learning_rate": 4.58950201884253e-05,
"loss": 0.0474,
"step": 27500
},
{
"epoch": 8.25,
"grad_norm": 2.019699811935425,
"learning_rate": 4.58800657993121e-05,
"loss": 0.0482,
"step": 27600
},
{
"epoch": 8.28,
"grad_norm": 0.9144898653030396,
"learning_rate": 4.58651114101989e-05,
"loss": 0.045,
"step": 27700
},
{
"epoch": 8.31,
"grad_norm": 1.656792402267456,
"learning_rate": 4.585015702108569e-05,
"loss": 0.0475,
"step": 27800
},
{
"epoch": 8.34,
"grad_norm": 1.1702663898468018,
"learning_rate": 4.5835202631972485e-05,
"loss": 0.0445,
"step": 27900
},
{
"epoch": 8.37,
"grad_norm": 2.0331854820251465,
"learning_rate": 4.5820248242859284e-05,
"loss": 0.0429,
"step": 28000
},
{
"epoch": 8.37,
"eval_loss": 0.22609786689281464,
"eval_precision": 0.9198246970868781,
"eval_recall": 0.8788447920194588,
"eval_runtime": 302.1631,
"eval_samples_per_second": 44.261,
"eval_steps_per_second": 1.383,
"step": 28000
},
{
"epoch": 8.4,
"grad_norm": 5.98319673538208,
"learning_rate": 4.580529385374608e-05,
"loss": 0.0429,
"step": 28100
},
{
"epoch": 8.43,
"grad_norm": 1.0793452262878418,
"learning_rate": 4.579033946463287e-05,
"loss": 0.0525,
"step": 28200
},
{
"epoch": 8.46,
"grad_norm": 1.4804214239120483,
"learning_rate": 4.577538507551967e-05,
"loss": 0.0459,
"step": 28300
},
{
"epoch": 8.49,
"grad_norm": 0.9862244129180908,
"learning_rate": 4.576043068640646e-05,
"loss": 0.0534,
"step": 28400
},
{
"epoch": 8.52,
"grad_norm": 1.26304030418396,
"learning_rate": 4.574547629729326e-05,
"loss": 0.048,
"step": 28500
},
{
"epoch": 8.55,
"grad_norm": 0.4214903712272644,
"learning_rate": 4.573052190818005e-05,
"loss": 0.0547,
"step": 28600
},
{
"epoch": 8.58,
"grad_norm": 0.9271091222763062,
"learning_rate": 4.571556751906685e-05,
"loss": 0.0537,
"step": 28700
},
{
"epoch": 8.61,
"grad_norm": 0.8437818884849548,
"learning_rate": 4.570061312995365e-05,
"loss": 0.0537,
"step": 28800
},
{
"epoch": 8.64,
"grad_norm": 0.8551807999610901,
"learning_rate": 4.5685658740840434e-05,
"loss": 0.0461,
"step": 28900
},
{
"epoch": 8.67,
"grad_norm": 1.8268975019454956,
"learning_rate": 4.5670704351727234e-05,
"loss": 0.046,
"step": 29000
},
{
"epoch": 8.67,
"eval_loss": 0.20938238501548767,
"eval_precision": 0.9151901573163308,
"eval_recall": 0.8794605745250778,
"eval_runtime": 302.034,
"eval_samples_per_second": 44.28,
"eval_steps_per_second": 1.384,
"step": 29000
},
{
"epoch": 8.7,
"grad_norm": 0.08975500613451004,
"learning_rate": 4.565574996261403e-05,
"loss": 0.0493,
"step": 29100
},
{
"epoch": 8.73,
"grad_norm": 2.3698606491088867,
"learning_rate": 4.564079557350082e-05,
"loss": 0.0506,
"step": 29200
},
{
"epoch": 8.76,
"grad_norm": 1.1118419170379639,
"learning_rate": 4.562584118438762e-05,
"loss": 0.0445,
"step": 29300
},
{
"epoch": 8.79,
"grad_norm": 1.8186097145080566,
"learning_rate": 4.561088679527442e-05,
"loss": 0.0471,
"step": 29400
},
{
"epoch": 8.82,
"grad_norm": 1.4056422710418701,
"learning_rate": 4.559593240616121e-05,
"loss": 0.0513,
"step": 29500
},
{
"epoch": 8.85,
"grad_norm": 1.5597076416015625,
"learning_rate": 4.5580978017048004e-05,
"loss": 0.0452,
"step": 29600
},
{
"epoch": 8.88,
"grad_norm": 0.8287553191184998,
"learning_rate": 4.5566023627934804e-05,
"loss": 0.0523,
"step": 29700
},
{
"epoch": 8.91,
"grad_norm": 0.6897550821304321,
"learning_rate": 4.55510692388216e-05,
"loss": 0.0466,
"step": 29800
},
{
"epoch": 8.94,
"grad_norm": 0.7071977853775024,
"learning_rate": 4.553611484970839e-05,
"loss": 0.0434,
"step": 29900
},
{
"epoch": 8.97,
"grad_norm": 0.6574975252151489,
"learning_rate": 4.552116046059518e-05,
"loss": 0.0495,
"step": 30000
},
{
"epoch": 8.97,
"eval_loss": 0.20542754232883453,
"eval_precision": 0.9183409556852231,
"eval_recall": 0.8964561716801626,
"eval_runtime": 302.3305,
"eval_samples_per_second": 44.236,
"eval_steps_per_second": 1.383,
"step": 30000
},
{
"epoch": 9.0,
"grad_norm": 1.3489534854888916,
"learning_rate": 4.550620607148198e-05,
"loss": 0.0499,
"step": 30100
},
{
"epoch": 9.03,
"grad_norm": 1.0300263166427612,
"learning_rate": 4.549125168236878e-05,
"loss": 0.0353,
"step": 30200
},
{
"epoch": 9.06,
"grad_norm": 0.4393318295478821,
"learning_rate": 4.547629729325557e-05,
"loss": 0.0352,
"step": 30300
},
{
"epoch": 9.09,
"grad_norm": 0.4519498944282532,
"learning_rate": 4.546134290414237e-05,
"loss": 0.0342,
"step": 30400
},
{
"epoch": 9.12,
"grad_norm": 0.9631327986717224,
"learning_rate": 4.544638851502917e-05,
"loss": 0.0364,
"step": 30500
},
{
"epoch": 9.15,
"grad_norm": 2.7282943725585938,
"learning_rate": 4.5431434125915954e-05,
"loss": 0.0354,
"step": 30600
},
{
"epoch": 9.18,
"grad_norm": 0.5908452272415161,
"learning_rate": 4.541647973680275e-05,
"loss": 0.0356,
"step": 30700
},
{
"epoch": 9.21,
"grad_norm": 2.3660802841186523,
"learning_rate": 4.540152534768955e-05,
"loss": 0.0413,
"step": 30800
},
{
"epoch": 9.24,
"grad_norm": 1.7346217632293701,
"learning_rate": 4.5386570958576346e-05,
"loss": 0.036,
"step": 30900
},
{
"epoch": 9.27,
"grad_norm": 1.0829362869262695,
"learning_rate": 4.537161656946314e-05,
"loss": 0.0376,
"step": 31000
},
{
"epoch": 9.27,
"eval_loss": 0.226752370595932,
"eval_precision": 0.925325841962565,
"eval_recall": 0.8721635518334924,
"eval_runtime": 302.3165,
"eval_samples_per_second": 44.238,
"eval_steps_per_second": 1.383,
"step": 31000
},
{
"epoch": 9.3,
"grad_norm": 1.2249701023101807,
"learning_rate": 4.535666218034993e-05,
"loss": 0.039,
"step": 31100
},
{
"epoch": 9.33,
"grad_norm": 2.201986789703369,
"learning_rate": 4.534170779123673e-05,
"loss": 0.0384,
"step": 31200
},
{
"epoch": 9.36,
"grad_norm": 0.31157541275024414,
"learning_rate": 4.5326753402123524e-05,
"loss": 0.0318,
"step": 31300
},
{
"epoch": 9.39,
"grad_norm": 0.7502834796905518,
"learning_rate": 4.531179901301032e-05,
"loss": 0.0397,
"step": 31400
},
{
"epoch": 9.42,
"grad_norm": 0.3627040684223175,
"learning_rate": 4.529684462389712e-05,
"loss": 0.0389,
"step": 31500
},
{
"epoch": 9.45,
"grad_norm": 2.008009672164917,
"learning_rate": 4.5281890234783916e-05,
"loss": 0.042,
"step": 31600
},
{
"epoch": 9.48,
"grad_norm": 2.5352540016174316,
"learning_rate": 4.52669358456707e-05,
"loss": 0.0407,
"step": 31700
},
{
"epoch": 9.51,
"grad_norm": 0.543992280960083,
"learning_rate": 4.52519814565575e-05,
"loss": 0.0309,
"step": 31800
},
{
"epoch": 9.54,
"grad_norm": 1.3150848150253296,
"learning_rate": 4.52370270674443e-05,
"loss": 0.0369,
"step": 31900
},
{
"epoch": 9.57,
"grad_norm": 1.6026105880737305,
"learning_rate": 4.522207267833109e-05,
"loss": 0.0418,
"step": 32000
},
{
"epoch": 9.57,
"eval_loss": 0.21585828065872192,
"eval_precision": 0.9208557844690967,
"eval_recall": 0.8945164567874627,
"eval_runtime": 303.0508,
"eval_samples_per_second": 44.131,
"eval_steps_per_second": 1.379,
"step": 32000
},
{
"epoch": 9.6,
"grad_norm": 1.8489359617233276,
"learning_rate": 4.520711828921789e-05,
"loss": 0.0427,
"step": 32100
},
{
"epoch": 9.63,
"grad_norm": 2.4979922771453857,
"learning_rate": 4.519216390010468e-05,
"loss": 0.0337,
"step": 32200
},
{
"epoch": 9.66,
"grad_norm": 0.3452712595462799,
"learning_rate": 4.517720951099148e-05,
"loss": 0.0347,
"step": 32300
},
{
"epoch": 9.69,
"grad_norm": 1.081455945968628,
"learning_rate": 4.516225512187827e-05,
"loss": 0.047,
"step": 32400
},
{
"epoch": 9.72,
"grad_norm": 2.3087069988250732,
"learning_rate": 4.5147300732765066e-05,
"loss": 0.0404,
"step": 32500
},
{
"epoch": 9.75,
"grad_norm": 1.901135802268982,
"learning_rate": 4.5132346343651865e-05,
"loss": 0.0394,
"step": 32600
},
{
"epoch": 9.78,
"grad_norm": 1.2389637231826782,
"learning_rate": 4.511739195453866e-05,
"loss": 0.0376,
"step": 32700
},
{
"epoch": 9.81,
"grad_norm": 0.619143545627594,
"learning_rate": 4.510243756542545e-05,
"loss": 0.0414,
"step": 32800
},
{
"epoch": 9.84,
"grad_norm": 1.3270721435546875,
"learning_rate": 4.508748317631225e-05,
"loss": 0.0405,
"step": 32900
},
{
"epoch": 9.87,
"grad_norm": 2.503606081008911,
"learning_rate": 4.507252878719905e-05,
"loss": 0.0493,
"step": 33000
},
{
"epoch": 9.87,
"eval_loss": 0.20709815621376038,
"eval_precision": 0.9246134231259603,
"eval_recall": 0.8708088303211305,
"eval_runtime": 301.957,
"eval_samples_per_second": 44.291,
"eval_steps_per_second": 1.384,
"step": 33000
},
{
"epoch": 9.9,
"grad_norm": 0.6343371868133545,
"learning_rate": 4.505757439808584e-05,
"loss": 0.0365,
"step": 33100
},
{
"epoch": 9.93,
"grad_norm": 0.3116106688976288,
"learning_rate": 4.5042620008972636e-05,
"loss": 0.0358,
"step": 33200
},
{
"epoch": 9.96,
"grad_norm": 0.7307326197624207,
"learning_rate": 4.5027665619859436e-05,
"loss": 0.0411,
"step": 33300
},
{
"epoch": 9.99,
"grad_norm": 2.104717493057251,
"learning_rate": 4.501271123074622e-05,
"loss": 0.0401,
"step": 33400
},
{
"epoch": 10.02,
"grad_norm": 3.8659448623657227,
"learning_rate": 4.499775684163302e-05,
"loss": 0.0348,
"step": 33500
},
{
"epoch": 10.05,
"grad_norm": 1.0324366092681885,
"learning_rate": 4.4982802452519815e-05,
"loss": 0.0344,
"step": 33600
},
{
"epoch": 10.08,
"grad_norm": 1.0838052034378052,
"learning_rate": 4.4967848063406614e-05,
"loss": 0.0327,
"step": 33700
},
{
"epoch": 10.11,
"grad_norm": 1.8709659576416016,
"learning_rate": 4.495289367429341e-05,
"loss": 0.0267,
"step": 33800
},
{
"epoch": 10.14,
"grad_norm": 0.4261041283607483,
"learning_rate": 4.49379392851802e-05,
"loss": 0.0305,
"step": 33900
},
{
"epoch": 10.17,
"grad_norm": 0.16497644782066345,
"learning_rate": 4.4922984896067e-05,
"loss": 0.0276,
"step": 34000
},
{
"epoch": 10.17,
"eval_loss": 0.2343963235616684,
"eval_precision": 0.9252133285746731,
"eval_recall": 0.8779826965115921,
"eval_runtime": 301.9423,
"eval_samples_per_second": 44.293,
"eval_steps_per_second": 1.384,
"step": 34000
},
{
"epoch": 10.2,
"grad_norm": 2.9655115604400635,
"learning_rate": 4.490803050695379e-05,
"loss": 0.0268,
"step": 34100
},
{
"epoch": 10.23,
"grad_norm": 1.536979079246521,
"learning_rate": 4.4893076117840586e-05,
"loss": 0.0299,
"step": 34200
},
{
"epoch": 10.26,
"grad_norm": 2.8167715072631836,
"learning_rate": 4.4878121728727385e-05,
"loss": 0.0325,
"step": 34300
},
{
"epoch": 10.29,
"grad_norm": 2.1207668781280518,
"learning_rate": 4.4863167339614185e-05,
"loss": 0.029,
"step": 34400
},
{
"epoch": 10.32,
"grad_norm": 2.277759552001953,
"learning_rate": 4.484821295050097e-05,
"loss": 0.0308,
"step": 34500
},
{
"epoch": 10.35,
"grad_norm": 1.226417899131775,
"learning_rate": 4.483325856138777e-05,
"loss": 0.0299,
"step": 34600
},
{
"epoch": 10.38,
"grad_norm": 0.63482266664505,
"learning_rate": 4.4818304172274563e-05,
"loss": 0.0337,
"step": 34700
},
{
"epoch": 10.41,
"grad_norm": 1.8453493118286133,
"learning_rate": 4.4803349783161356e-05,
"loss": 0.0346,
"step": 34800
},
{
"epoch": 10.44,
"grad_norm": 0.40149375796318054,
"learning_rate": 4.4788395394048156e-05,
"loss": 0.03,
"step": 34900
},
{
"epoch": 10.47,
"grad_norm": 0.3980793058872223,
"learning_rate": 4.477344100493495e-05,
"loss": 0.035,
"step": 35000
},
{
"epoch": 10.47,
"eval_loss": 0.22229593992233276,
"eval_precision": 0.9262946269334285,
"eval_recall": 0.8795221527756396,
"eval_runtime": 302.9773,
"eval_samples_per_second": 44.142,
"eval_steps_per_second": 1.38,
"step": 35000
},
{
"epoch": 10.5,
"grad_norm": 0.629266083240509,
"learning_rate": 4.475848661582174e-05,
"loss": 0.0363,
"step": 35100
},
{
"epoch": 10.53,
"grad_norm": 1.134805679321289,
"learning_rate": 4.474353222670854e-05,
"loss": 0.0343,
"step": 35200
},
{
"epoch": 10.56,
"grad_norm": 1.9168953895568848,
"learning_rate": 4.4728577837595334e-05,
"loss": 0.0333,
"step": 35300
},
{
"epoch": 10.59,
"grad_norm": 0.7437408566474915,
"learning_rate": 4.4713623448482134e-05,
"loss": 0.0377,
"step": 35400
},
{
"epoch": 10.62,
"grad_norm": 0.8649216890335083,
"learning_rate": 4.469866905936893e-05,
"loss": 0.0387,
"step": 35500
},
{
"epoch": 10.65,
"grad_norm": 1.9679126739501953,
"learning_rate": 4.468371467025572e-05,
"loss": 0.0324,
"step": 35600
},
{
"epoch": 10.68,
"grad_norm": 1.0343681573867798,
"learning_rate": 4.466876028114252e-05,
"loss": 0.0371,
"step": 35700
},
{
"epoch": 10.71,
"grad_norm": 0.3291555941104889,
"learning_rate": 4.465380589202931e-05,
"loss": 0.0339,
"step": 35800
},
{
"epoch": 10.74,
"grad_norm": 1.2407808303833008,
"learning_rate": 4.4638851502916105e-05,
"loss": 0.0376,
"step": 35900
},
{
"epoch": 10.77,
"grad_norm": 1.2906955480575562,
"learning_rate": 4.4623897113802905e-05,
"loss": 0.0348,
"step": 36000
},
{
"epoch": 10.77,
"eval_loss": 0.22172214090824127,
"eval_precision": 0.9251365945617791,
"eval_recall": 0.8914683333846486,
"eval_runtime": 302.63,
"eval_samples_per_second": 44.193,
"eval_steps_per_second": 1.381,
"step": 36000
},
{
"epoch": 10.8,
"grad_norm": 0.9678496718406677,
"learning_rate": 4.46089427246897e-05,
"loss": 0.0354,
"step": 36100
},
{
"epoch": 10.83,
"grad_norm": 1.92240571975708,
"learning_rate": 4.459398833557649e-05,
"loss": 0.0324,
"step": 36200
},
{
"epoch": 10.86,
"grad_norm": 2.5916824340820312,
"learning_rate": 4.457903394646329e-05,
"loss": 0.034,
"step": 36300
},
{
"epoch": 10.89,
"grad_norm": 1.4677050113677979,
"learning_rate": 4.456407955735008e-05,
"loss": 0.0304,
"step": 36400
},
{
"epoch": 10.92,
"grad_norm": 1.1423336267471313,
"learning_rate": 4.4549125168236876e-05,
"loss": 0.0315,
"step": 36500
},
{
"epoch": 10.94,
"grad_norm": 1.0664762258529663,
"learning_rate": 4.4534170779123676e-05,
"loss": 0.0371,
"step": 36600
},
{
"epoch": 10.97,
"grad_norm": 1.344557762145996,
"learning_rate": 4.451921639001047e-05,
"loss": 0.0334,
"step": 36700
},
{
"epoch": 11.0,
"grad_norm": 2.944450616836548,
"learning_rate": 4.450426200089727e-05,
"loss": 0.0312,
"step": 36800
},
{
"epoch": 11.03,
"grad_norm": 1.02321195602417,
"learning_rate": 4.448930761178406e-05,
"loss": 0.0243,
"step": 36900
},
{
"epoch": 11.06,
"grad_norm": 1.4520535469055176,
"learning_rate": 4.4474353222670854e-05,
"loss": 0.0263,
"step": 37000
},
{
"epoch": 11.06,
"eval_loss": 0.23973342776298523,
"eval_precision": 0.928783958602846,
"eval_recall": 0.8842020998183442,
"eval_runtime": 302.1259,
"eval_samples_per_second": 44.266,
"eval_steps_per_second": 1.384,
"step": 37000
},
{
"epoch": 11.09,
"grad_norm": 0.9927899837493896,
"learning_rate": 4.4459398833557654e-05,
"loss": 0.0251,
"step": 37100
},
{
"epoch": 11.12,
"grad_norm": 0.7255445122718811,
"learning_rate": 4.4444444444444447e-05,
"loss": 0.023,
"step": 37200
},
{
"epoch": 11.15,
"grad_norm": 1.2551404237747192,
"learning_rate": 4.442949005533124e-05,
"loss": 0.0282,
"step": 37300
},
{
"epoch": 11.18,
"grad_norm": 1.8652236461639404,
"learning_rate": 4.441453566621804e-05,
"loss": 0.0265,
"step": 37400
},
{
"epoch": 11.21,
"grad_norm": 0.29598140716552734,
"learning_rate": 4.439958127710483e-05,
"loss": 0.0231,
"step": 37500
},
{
"epoch": 11.24,
"grad_norm": 0.517977774143219,
"learning_rate": 4.4384626887991625e-05,
"loss": 0.0266,
"step": 37600
},
{
"epoch": 11.27,
"grad_norm": 1.3159215450286865,
"learning_rate": 4.4369672498878425e-05,
"loss": 0.0246,
"step": 37700
},
{
"epoch": 11.3,
"grad_norm": 1.8311362266540527,
"learning_rate": 4.435471810976522e-05,
"loss": 0.0325,
"step": 37800
},
{
"epoch": 11.33,
"grad_norm": 2.8861258029937744,
"learning_rate": 4.433976372065201e-05,
"loss": 0.0303,
"step": 37900
},
{
"epoch": 11.36,
"grad_norm": 0.6612695455551147,
"learning_rate": 4.432480933153881e-05,
"loss": 0.0284,
"step": 38000
},
{
"epoch": 11.36,
"eval_loss": 0.23250487446784973,
"eval_precision": 0.9248716302952503,
"eval_recall": 0.8873118014717202,
"eval_runtime": 302.5481,
"eval_samples_per_second": 44.205,
"eval_steps_per_second": 1.382,
"step": 38000
},
{
"epoch": 11.39,
"grad_norm": 0.8181266784667969,
"learning_rate": 4.43098549424256e-05,
"loss": 0.0251,
"step": 38100
},
{
"epoch": 11.42,
"grad_norm": 0.48834991455078125,
"learning_rate": 4.42949005533124e-05,
"loss": 0.0313,
"step": 38200
},
{
"epoch": 11.45,
"grad_norm": 0.4897523820400238,
"learning_rate": 4.4279946164199195e-05,
"loss": 0.0328,
"step": 38300
},
{
"epoch": 11.48,
"grad_norm": 0.7222294807434082,
"learning_rate": 4.426499177508599e-05,
"loss": 0.0298,
"step": 38400
},
{
"epoch": 11.51,
"grad_norm": 0.07086914777755737,
"learning_rate": 4.425003738597279e-05,
"loss": 0.032,
"step": 38500
},
{
"epoch": 11.54,
"grad_norm": 1.4812002182006836,
"learning_rate": 4.423508299685958e-05,
"loss": 0.0282,
"step": 38600
},
{
"epoch": 11.57,
"grad_norm": 1.302590012550354,
"learning_rate": 4.4220128607746374e-05,
"loss": 0.027,
"step": 38700
},
{
"epoch": 11.6,
"grad_norm": 1.9532426595687866,
"learning_rate": 4.420517421863317e-05,
"loss": 0.0304,
"step": 38800
},
{
"epoch": 11.63,
"grad_norm": 2.029754638671875,
"learning_rate": 4.4190219829519966e-05,
"loss": 0.0301,
"step": 38900
},
{
"epoch": 11.66,
"grad_norm": 1.320448398590088,
"learning_rate": 4.417526544040676e-05,
"loss": 0.0277,
"step": 39000
},
{
"epoch": 11.66,
"eval_loss": 0.241913303732872,
"eval_precision": 0.9234215627085253,
"eval_recall": 0.8947627697897103,
"eval_runtime": 303.3685,
"eval_samples_per_second": 44.085,
"eval_steps_per_second": 1.378,
"step": 39000
},
{
"epoch": 11.69,
"grad_norm": 1.8267722129821777,
"learning_rate": 4.416031105129356e-05,
"loss": 0.0249,
"step": 39100
},
{
"epoch": 11.72,
"grad_norm": 0.7122277021408081,
"learning_rate": 4.414535666218035e-05,
"loss": 0.0323,
"step": 39200
},
{
"epoch": 11.75,
"grad_norm": 0.5691227316856384,
"learning_rate": 4.4130402273067145e-05,
"loss": 0.0325,
"step": 39300
},
{
"epoch": 11.78,
"grad_norm": 0.40894216299057007,
"learning_rate": 4.4115447883953944e-05,
"loss": 0.0301,
"step": 39400
},
{
"epoch": 11.81,
"grad_norm": 2.4805972576141357,
"learning_rate": 4.410049349484074e-05,
"loss": 0.0277,
"step": 39500
},
{
"epoch": 11.84,
"grad_norm": 1.2774219512939453,
"learning_rate": 4.408553910572754e-05,
"loss": 0.0278,
"step": 39600
},
{
"epoch": 11.87,
"grad_norm": 1.267562985420227,
"learning_rate": 4.407058471661433e-05,
"loss": 0.0286,
"step": 39700
},
{
"epoch": 11.9,
"grad_norm": 0.6910821795463562,
"learning_rate": 4.405563032750112e-05,
"loss": 0.0344,
"step": 39800
},
{
"epoch": 11.93,
"grad_norm": 0.3539283275604248,
"learning_rate": 4.404067593838792e-05,
"loss": 0.0298,
"step": 39900
},
{
"epoch": 11.96,
"grad_norm": 1.7098407745361328,
"learning_rate": 4.4025721549274715e-05,
"loss": 0.0318,
"step": 40000
},
{
"epoch": 11.96,
"eval_loss": 0.23493793606758118,
"eval_precision": 0.9253437490076529,
"eval_recall": 0.8971951106869054,
"eval_runtime": 302.3541,
"eval_samples_per_second": 44.233,
"eval_steps_per_second": 1.382,
"step": 40000
},
{
"epoch": 11.99,
"grad_norm": 2.1748311519622803,
"learning_rate": 4.401076716016151e-05,
"loss": 0.0312,
"step": 40100
},
{
"epoch": 12.02,
"grad_norm": 0.8426460027694702,
"learning_rate": 4.399581277104831e-05,
"loss": 0.0262,
"step": 40200
},
{
"epoch": 12.05,
"grad_norm": 0.3200826048851013,
"learning_rate": 4.39808583819351e-05,
"loss": 0.0237,
"step": 40300
},
{
"epoch": 12.08,
"grad_norm": 0.2708234488964081,
"learning_rate": 4.3965903992821893e-05,
"loss": 0.0229,
"step": 40400
},
{
"epoch": 12.11,
"grad_norm": 1.4237157106399536,
"learning_rate": 4.395094960370869e-05,
"loss": 0.0198,
"step": 40500
},
{
"epoch": 12.14,
"grad_norm": 0.06805676221847534,
"learning_rate": 4.3935995214595486e-05,
"loss": 0.026,
"step": 40600
},
{
"epoch": 12.17,
"grad_norm": 1.2842926979064941,
"learning_rate": 4.392104082548228e-05,
"loss": 0.0241,
"step": 40700
},
{
"epoch": 12.2,
"grad_norm": 1.5190855264663696,
"learning_rate": 4.390608643636908e-05,
"loss": 0.0232,
"step": 40800
},
{
"epoch": 12.23,
"grad_norm": 1.8280004262924194,
"learning_rate": 4.389113204725587e-05,
"loss": 0.0241,
"step": 40900
},
{
"epoch": 12.26,
"grad_norm": 0.19059352576732635,
"learning_rate": 4.3876177658142664e-05,
"loss": 0.0238,
"step": 41000
},
{
"epoch": 12.26,
"eval_loss": 0.24695585668087006,
"eval_precision": 0.9256610729722858,
"eval_recall": 0.8967332738076911,
"eval_runtime": 302.2734,
"eval_samples_per_second": 44.245,
"eval_steps_per_second": 1.383,
"step": 41000
},
{
"epoch": 12.29,
"grad_norm": 0.40746474266052246,
"learning_rate": 4.3861223269029464e-05,
"loss": 0.0232,
"step": 41100
},
{
"epoch": 12.32,
"grad_norm": 1.2412996292114258,
"learning_rate": 4.384626887991626e-05,
"loss": 0.0215,
"step": 41200
},
{
"epoch": 12.35,
"grad_norm": 0.2166558802127838,
"learning_rate": 4.3831314490803056e-05,
"loss": 0.0237,
"step": 41300
},
{
"epoch": 12.38,
"grad_norm": 0.719872236251831,
"learning_rate": 4.381636010168985e-05,
"loss": 0.0253,
"step": 41400
},
{
"epoch": 12.41,
"grad_norm": 1.5946626663208008,
"learning_rate": 4.380140571257664e-05,
"loss": 0.0235,
"step": 41500
},
{
"epoch": 12.44,
"grad_norm": 1.0119950771331787,
"learning_rate": 4.378645132346344e-05,
"loss": 0.0257,
"step": 41600
},
{
"epoch": 12.47,
"grad_norm": 0.9327923059463501,
"learning_rate": 4.377149693435023e-05,
"loss": 0.0243,
"step": 41700
},
{
"epoch": 12.5,
"grad_norm": 0.41256028413772583,
"learning_rate": 4.375654254523703e-05,
"loss": 0.0272,
"step": 41800
},
{
"epoch": 12.53,
"grad_norm": 0.1845785677433014,
"learning_rate": 4.374158815612383e-05,
"loss": 0.029,
"step": 41900
},
{
"epoch": 12.56,
"grad_norm": 1.754239559173584,
"learning_rate": 4.372663376701062e-05,
"loss": 0.0252,
"step": 42000
},
{
"epoch": 12.56,
"eval_loss": 0.2473253309726715,
"eval_precision": 0.9269791733010636,
"eval_recall": 0.8962406478031959,
"eval_runtime": 304.5592,
"eval_samples_per_second": 43.913,
"eval_steps_per_second": 1.372,
"step": 42000
},
{
"epoch": 12.59,
"grad_norm": 0.5748271346092224,
"learning_rate": 4.371167937789741e-05,
"loss": 0.0281,
"step": 42100
},
{
"epoch": 12.62,
"grad_norm": 0.36274582147598267,
"learning_rate": 4.369672498878421e-05,
"loss": 0.0248,
"step": 42200
},
{
"epoch": 12.65,
"grad_norm": 0.6130300164222717,
"learning_rate": 4.3681770599671006e-05,
"loss": 0.0269,
"step": 42300
},
{
"epoch": 12.68,
"grad_norm": 1.2477418184280396,
"learning_rate": 4.36668162105578e-05,
"loss": 0.0259,
"step": 42400
},
{
"epoch": 12.71,
"grad_norm": 0.8152483701705933,
"learning_rate": 4.36518618214446e-05,
"loss": 0.0263,
"step": 42500
},
{
"epoch": 12.74,
"grad_norm": 0.04731460288167,
"learning_rate": 4.363690743233139e-05,
"loss": 0.024,
"step": 42600
},
{
"epoch": 12.77,
"grad_norm": 0.7886996865272522,
"learning_rate": 4.362195304321819e-05,
"loss": 0.0245,
"step": 42700
},
{
"epoch": 12.8,
"grad_norm": 2.1900315284729004,
"learning_rate": 4.360699865410498e-05,
"loss": 0.0292,
"step": 42800
},
{
"epoch": 12.83,
"grad_norm": 0.45924192667007446,
"learning_rate": 4.3592044264991777e-05,
"loss": 0.0261,
"step": 42900
},
{
"epoch": 12.86,
"grad_norm": 0.07307754456996918,
"learning_rate": 4.3577089875878576e-05,
"loss": 0.0248,
"step": 43000
},
{
"epoch": 12.86,
"eval_loss": 0.24504822492599487,
"eval_precision": 0.9273960876319711,
"eval_recall": 0.9006127035930909,
"eval_runtime": 303.9567,
"eval_samples_per_second": 44.0,
"eval_steps_per_second": 1.375,
"step": 43000
},
{
"epoch": 12.89,
"grad_norm": 0.4676400423049927,
"learning_rate": 4.356213548676536e-05,
"loss": 0.0232,
"step": 43100
},
{
"epoch": 12.92,
"grad_norm": 0.2993585765361786,
"learning_rate": 4.354718109765216e-05,
"loss": 0.0237,
"step": 43200
},
{
"epoch": 12.95,
"grad_norm": 1.226276159286499,
"learning_rate": 4.353222670853896e-05,
"loss": 0.0256,
"step": 43300
},
{
"epoch": 12.98,
"grad_norm": 1.5110477209091187,
"learning_rate": 4.3517272319425754e-05,
"loss": 0.0285,
"step": 43400
},
{
"epoch": 13.01,
"grad_norm": 1.6162513494491577,
"learning_rate": 4.350231793031255e-05,
"loss": 0.0219,
"step": 43500
},
{
"epoch": 13.04,
"grad_norm": 0.1792839914560318,
"learning_rate": 4.348736354119935e-05,
"loss": 0.0191,
"step": 43600
},
{
"epoch": 13.07,
"grad_norm": 1.9044649600982666,
"learning_rate": 4.347240915208614e-05,
"loss": 0.017,
"step": 43700
},
{
"epoch": 13.1,
"grad_norm": 0.5899202823638916,
"learning_rate": 4.345745476297293e-05,
"loss": 0.0241,
"step": 43800
},
{
"epoch": 13.13,
"grad_norm": 0.6521077752113342,
"learning_rate": 4.344250037385973e-05,
"loss": 0.0216,
"step": 43900
},
{
"epoch": 13.16,
"grad_norm": 0.7596339583396912,
"learning_rate": 4.3427545984746525e-05,
"loss": 0.0181,
"step": 44000
},
{
"epoch": 13.16,
"eval_loss": 0.2613174319267273,
"eval_precision": 0.9276514907592247,
"eval_recall": 0.8870654884694725,
"eval_runtime": 304.3764,
"eval_samples_per_second": 43.939,
"eval_steps_per_second": 1.373,
"step": 44000
},
{
"epoch": 13.19,
"grad_norm": 1.0404387712478638,
"learning_rate": 4.3412591595633325e-05,
"loss": 0.0247,
"step": 44100
},
{
"epoch": 13.22,
"grad_norm": 1.7849115133285522,
"learning_rate": 4.339763720652011e-05,
"loss": 0.0188,
"step": 44200
},
{
"epoch": 13.25,
"grad_norm": 1.0972092151641846,
"learning_rate": 4.338268281740691e-05,
"loss": 0.0255,
"step": 44300
},
{
"epoch": 13.28,
"grad_norm": 0.7391771078109741,
"learning_rate": 4.336772842829371e-05,
"loss": 0.0225,
"step": 44400
},
{
"epoch": 13.31,
"grad_norm": 1.5010148286819458,
"learning_rate": 4.3352774039180497e-05,
"loss": 0.0217,
"step": 44500
},
{
"epoch": 13.34,
"grad_norm": 0.7189137935638428,
"learning_rate": 4.3337819650067296e-05,
"loss": 0.0211,
"step": 44600
},
{
"epoch": 13.37,
"grad_norm": 1.003636121749878,
"learning_rate": 4.3322865260954096e-05,
"loss": 0.0236,
"step": 44700
},
{
"epoch": 13.4,
"grad_norm": 0.914703369140625,
"learning_rate": 4.330791087184089e-05,
"loss": 0.0224,
"step": 44800
},
{
"epoch": 13.43,
"grad_norm": 0.1861487776041031,
"learning_rate": 4.329295648272768e-05,
"loss": 0.0251,
"step": 44900
},
{
"epoch": 13.46,
"grad_norm": 0.7734150886535645,
"learning_rate": 4.327800209361448e-05,
"loss": 0.0254,
"step": 45000
},
{
"epoch": 13.46,
"eval_loss": 0.2583397924900055,
"eval_precision": 0.9213451745124829,
"eval_recall": 0.9135441362110902,
"eval_runtime": 305.1941,
"eval_samples_per_second": 43.821,
"eval_steps_per_second": 1.37,
"step": 45000
},
{
"epoch": 13.49,
"grad_norm": 0.7596560716629028,
"learning_rate": 4.3263047704501274e-05,
"loss": 0.0246,
"step": 45100
},
{
"epoch": 13.52,
"grad_norm": 1.4200429916381836,
"learning_rate": 4.324809331538807e-05,
"loss": 0.0174,
"step": 45200
},
{
"epoch": 13.55,
"grad_norm": 2.7082788944244385,
"learning_rate": 4.323313892627486e-05,
"loss": 0.026,
"step": 45300
},
{
"epoch": 13.58,
"grad_norm": 1.2132717370986938,
"learning_rate": 4.321818453716166e-05,
"loss": 0.0228,
"step": 45400
},
{
"epoch": 13.61,
"grad_norm": 3.768927812576294,
"learning_rate": 4.320323014804846e-05,
"loss": 0.0236,
"step": 45500
},
{
"epoch": 13.64,
"grad_norm": 1.5163260698318481,
"learning_rate": 4.3188275758935245e-05,
"loss": 0.0189,
"step": 45600
},
{
"epoch": 13.67,
"grad_norm": 0.7969369888305664,
"learning_rate": 4.3173321369822045e-05,
"loss": 0.0245,
"step": 45700
},
{
"epoch": 13.7,
"grad_norm": 1.445375680923462,
"learning_rate": 4.3158366980708845e-05,
"loss": 0.0232,
"step": 45800
},
{
"epoch": 13.73,
"grad_norm": 0.04813400283455849,
"learning_rate": 4.314341259159563e-05,
"loss": 0.0215,
"step": 45900
},
{
"epoch": 13.76,
"grad_norm": 2.0303447246551514,
"learning_rate": 4.312845820248243e-05,
"loss": 0.0206,
"step": 46000
},
{
"epoch": 13.76,
"eval_loss": 0.2769757807254791,
"eval_precision": 0.9277020832674738,
"eval_recall": 0.9035376704947813,
"eval_runtime": 304.0355,
"eval_samples_per_second": 43.988,
"eval_steps_per_second": 1.375,
"step": 46000
},
{
"epoch": 13.79,
"grad_norm": 0.9254265427589417,
"learning_rate": 4.311350381336923e-05,
"loss": 0.0203,
"step": 46100
},
{
"epoch": 13.82,
"grad_norm": 2.1310763359069824,
"learning_rate": 4.309854942425602e-05,
"loss": 0.0206,
"step": 46200
},
{
"epoch": 13.85,
"grad_norm": 0.5353107452392578,
"learning_rate": 4.3083595035142816e-05,
"loss": 0.0206,
"step": 46300
},
{
"epoch": 13.88,
"grad_norm": 0.9395775198936462,
"learning_rate": 4.306864064602961e-05,
"loss": 0.0304,
"step": 46400
},
{
"epoch": 13.91,
"grad_norm": 0.056145694106817245,
"learning_rate": 4.305368625691641e-05,
"loss": 0.0237,
"step": 46500
},
{
"epoch": 13.94,
"grad_norm": 0.03264997899532318,
"learning_rate": 4.30387318678032e-05,
"loss": 0.0244,
"step": 46600
},
{
"epoch": 13.97,
"grad_norm": 1.6055926084518433,
"learning_rate": 4.3023777478689994e-05,
"loss": 0.0224,
"step": 46700
},
{
"epoch": 14.0,
"grad_norm": 1.4891152381896973,
"learning_rate": 4.3008823089576794e-05,
"loss": 0.021,
"step": 46800
},
{
"epoch": 14.03,
"grad_norm": 0.3057061731815338,
"learning_rate": 4.299386870046359e-05,
"loss": 0.0173,
"step": 46900
},
{
"epoch": 14.06,
"grad_norm": 1.0254565477371216,
"learning_rate": 4.297891431135038e-05,
"loss": 0.017,
"step": 47000
},
{
"epoch": 14.06,
"eval_loss": 0.2714207172393799,
"eval_precision": 0.9283886660138359,
"eval_recall": 0.9048923920071431,
"eval_runtime": 302.2817,
"eval_samples_per_second": 44.244,
"eval_steps_per_second": 1.383,
"step": 47000
},
{
"epoch": 14.08,
"grad_norm": 0.6178631782531738,
"learning_rate": 4.296395992223718e-05,
"loss": 0.021,
"step": 47100
},
{
"epoch": 14.11,
"grad_norm": 3.516096353530884,
"learning_rate": 4.294900553312398e-05,
"loss": 0.0181,
"step": 47200
},
{
"epoch": 14.14,
"grad_norm": 0.20362690091133118,
"learning_rate": 4.2934051144010765e-05,
"loss": 0.0193,
"step": 47300
},
{
"epoch": 14.17,
"grad_norm": 2.5930867195129395,
"learning_rate": 4.2919096754897565e-05,
"loss": 0.0176,
"step": 47400
},
{
"epoch": 14.2,
"grad_norm": 1.4823873043060303,
"learning_rate": 4.2904142365784364e-05,
"loss": 0.0173,
"step": 47500
},
{
"epoch": 14.23,
"grad_norm": 0.5278753042221069,
"learning_rate": 4.288918797667115e-05,
"loss": 0.0212,
"step": 47600
},
{
"epoch": 14.26,
"grad_norm": 1.855218529701233,
"learning_rate": 4.287423358755795e-05,
"loss": 0.0199,
"step": 47700
},
{
"epoch": 14.29,
"grad_norm": 0.31464433670043945,
"learning_rate": 4.285927919844474e-05,
"loss": 0.0241,
"step": 47800
},
{
"epoch": 14.32,
"grad_norm": 0.2182936817407608,
"learning_rate": 4.284432480933154e-05,
"loss": 0.0172,
"step": 47900
},
{
"epoch": 14.35,
"grad_norm": 1.2800421714782715,
"learning_rate": 4.2829370420218336e-05,
"loss": 0.0188,
"step": 48000
},
{
"epoch": 14.35,
"eval_loss": 0.26452192664146423,
"eval_precision": 0.9272217673363986,
"eval_recall": 0.9065242156470334,
"eval_runtime": 302.9199,
"eval_samples_per_second": 44.15,
"eval_steps_per_second": 1.38,
"step": 48000
},
{
"epoch": 14.38,
"grad_norm": 3.320737361907959,
"learning_rate": 4.281441603110513e-05,
"loss": 0.0198,
"step": 48100
},
{
"epoch": 14.41,
"grad_norm": 0.8519121408462524,
"learning_rate": 4.279946164199193e-05,
"loss": 0.0182,
"step": 48200
},
{
"epoch": 14.44,
"grad_norm": 0.4318147599697113,
"learning_rate": 4.278450725287872e-05,
"loss": 0.0178,
"step": 48300
},
{
"epoch": 14.47,
"grad_norm": 0.047759074717760086,
"learning_rate": 4.2769552863765514e-05,
"loss": 0.021,
"step": 48400
},
{
"epoch": 14.5,
"grad_norm": 1.6022422313690186,
"learning_rate": 4.2754598474652314e-05,
"loss": 0.0144,
"step": 48500
},
{
"epoch": 14.53,
"grad_norm": 0.7104184031486511,
"learning_rate": 4.273964408553911e-05,
"loss": 0.0207,
"step": 48600
},
{
"epoch": 14.56,
"grad_norm": 1.5093780755996704,
"learning_rate": 4.27246896964259e-05,
"loss": 0.0205,
"step": 48700
},
{
"epoch": 14.59,
"grad_norm": 0.7566470503807068,
"learning_rate": 4.27097353073127e-05,
"loss": 0.0187,
"step": 48800
},
{
"epoch": 14.62,
"grad_norm": 1.222693920135498,
"learning_rate": 4.269478091819949e-05,
"loss": 0.0199,
"step": 48900
},
{
"epoch": 14.65,
"grad_norm": 1.5546650886535645,
"learning_rate": 4.2679826529086285e-05,
"loss": 0.0188,
"step": 49000
},
{
"epoch": 14.65,
"eval_loss": 0.2760772109031677,
"eval_precision": 0.9305101058710299,
"eval_recall": 0.8930077896486961,
"eval_runtime": 301.8588,
"eval_samples_per_second": 44.305,
"eval_steps_per_second": 1.385,
"step": 49000
},
{
"epoch": 14.68,
"grad_norm": 0.6152912378311157,
"learning_rate": 4.2664872139973084e-05,
"loss": 0.0199,
"step": 49100
},
{
"epoch": 14.71,
"grad_norm": 0.8479551672935486,
"learning_rate": 4.264991775085988e-05,
"loss": 0.0236,
"step": 49200
},
{
"epoch": 14.74,
"grad_norm": 2.0793190002441406,
"learning_rate": 4.263496336174668e-05,
"loss": 0.0257,
"step": 49300
},
{
"epoch": 14.77,
"grad_norm": 0.9795339107513428,
"learning_rate": 4.262000897263347e-05,
"loss": 0.019,
"step": 49400
},
{
"epoch": 14.8,
"grad_norm": 0.49018004536628723,
"learning_rate": 4.260505458352026e-05,
"loss": 0.0207,
"step": 49500
},
{
"epoch": 14.83,
"grad_norm": 0.22400274872779846,
"learning_rate": 4.259010019440706e-05,
"loss": 0.0212,
"step": 49600
},
{
"epoch": 14.86,
"grad_norm": 0.8345464468002319,
"learning_rate": 4.2575145805293855e-05,
"loss": 0.0182,
"step": 49700
},
{
"epoch": 14.89,
"grad_norm": 0.2443341612815857,
"learning_rate": 4.256019141618065e-05,
"loss": 0.0177,
"step": 49800
},
{
"epoch": 14.92,
"grad_norm": 0.697216272354126,
"learning_rate": 4.254523702706745e-05,
"loss": 0.0216,
"step": 49900
},
{
"epoch": 14.95,
"grad_norm": 0.5050187706947327,
"learning_rate": 4.253028263795424e-05,
"loss": 0.0166,
"step": 50000
},
{
"epoch": 14.95,
"eval_loss": 0.282767653465271,
"eval_precision": 0.9254008757836374,
"eval_recall": 0.9044305551279288,
"eval_runtime": 303.3682,
"eval_samples_per_second": 44.085,
"eval_steps_per_second": 1.378,
"step": 50000
},
{
"epoch": 14.98,
"grad_norm": 0.4018344283103943,
"learning_rate": 4.2515328248841034e-05,
"loss": 0.02,
"step": 50100
},
{
"epoch": 15.01,
"grad_norm": 2.2681732177734375,
"learning_rate": 4.250037385972783e-05,
"loss": 0.0169,
"step": 50200
},
{
"epoch": 15.04,
"grad_norm": 0.18065716326236725,
"learning_rate": 4.2485419470614626e-05,
"loss": 0.0163,
"step": 50300
},
{
"epoch": 15.07,
"grad_norm": 1.0265353918075562,
"learning_rate": 4.247046508150142e-05,
"loss": 0.0201,
"step": 50400
},
{
"epoch": 15.1,
"grad_norm": 1.7455101013183594,
"learning_rate": 4.245551069238822e-05,
"loss": 0.0174,
"step": 50500
},
{
"epoch": 15.13,
"grad_norm": 0.03697839379310608,
"learning_rate": 4.244055630327501e-05,
"loss": 0.021,
"step": 50600
},
{
"epoch": 15.16,
"grad_norm": 0.10842275619506836,
"learning_rate": 4.242560191416181e-05,
"loss": 0.0196,
"step": 50700
},
{
"epoch": 15.19,
"grad_norm": 0.6541497111320496,
"learning_rate": 4.2410647525048604e-05,
"loss": 0.019,
"step": 50800
},
{
"epoch": 15.22,
"grad_norm": 1.3006408214569092,
"learning_rate": 4.23956931359354e-05,
"loss": 0.0178,
"step": 50900
},
{
"epoch": 15.25,
"grad_norm": 0.6021150350570679,
"learning_rate": 4.23807387468222e-05,
"loss": 0.0199,
"step": 51000
},
{
"epoch": 15.25,
"eval_loss": 0.2640076279640198,
"eval_precision": 0.9301819557882123,
"eval_recall": 0.9081868284122048,
"eval_runtime": 302.9987,
"eval_samples_per_second": 44.139,
"eval_steps_per_second": 1.38,
"step": 51000
},
{
"epoch": 15.28,
"grad_norm": 0.8783787488937378,
"learning_rate": 4.236578435770899e-05,
"loss": 0.0175,
"step": 51100
},
{
"epoch": 15.31,
"grad_norm": 0.18405625224113464,
"learning_rate": 4.235082996859578e-05,
"loss": 0.0152,
"step": 51200
},
{
"epoch": 15.34,
"grad_norm": 0.03877532109618187,
"learning_rate": 4.233587557948258e-05,
"loss": 0.0174,
"step": 51300
},
{
"epoch": 15.37,
"grad_norm": 0.3079793155193329,
"learning_rate": 4.2320921190369375e-05,
"loss": 0.015,
"step": 51400
},
{
"epoch": 15.4,
"grad_norm": 0.9296764731407166,
"learning_rate": 4.230596680125617e-05,
"loss": 0.0177,
"step": 51500
},
{
"epoch": 15.43,
"grad_norm": 0.7762422561645508,
"learning_rate": 4.229101241214297e-05,
"loss": 0.0195,
"step": 51600
},
{
"epoch": 15.46,
"grad_norm": 2.472615957260132,
"learning_rate": 4.227605802302976e-05,
"loss": 0.0195,
"step": 51700
},
{
"epoch": 15.49,
"grad_norm": 2.8045852184295654,
"learning_rate": 4.226110363391655e-05,
"loss": 0.0201,
"step": 51800
},
{
"epoch": 15.52,
"grad_norm": 0.053874421864748,
"learning_rate": 4.224614924480335e-05,
"loss": 0.018,
"step": 51900
},
{
"epoch": 15.55,
"grad_norm": 0.3398553729057312,
"learning_rate": 4.2231194855690146e-05,
"loss": 0.0167,
"step": 52000
},
{
"epoch": 15.55,
"eval_loss": 0.2754287123680115,
"eval_precision": 0.927292017724521,
"eval_recall": 0.914929646848733,
"eval_runtime": 302.6973,
"eval_samples_per_second": 44.183,
"eval_steps_per_second": 1.381,
"step": 52000
},
{
"epoch": 15.58,
"grad_norm": 1.1841187477111816,
"learning_rate": 4.2216240466576945e-05,
"loss": 0.0157,
"step": 52100
},
{
"epoch": 15.61,
"grad_norm": 1.0184565782546997,
"learning_rate": 4.220128607746374e-05,
"loss": 0.0145,
"step": 52200
},
{
"epoch": 15.64,
"grad_norm": 0.6707783937454224,
"learning_rate": 4.218633168835053e-05,
"loss": 0.0215,
"step": 52300
},
{
"epoch": 15.67,
"grad_norm": 0.8084210157394409,
"learning_rate": 4.217137729923733e-05,
"loss": 0.0185,
"step": 52400
},
{
"epoch": 15.7,
"grad_norm": 0.24998579919338226,
"learning_rate": 4.2156422910124124e-05,
"loss": 0.0192,
"step": 52500
},
{
"epoch": 15.73,
"grad_norm": 0.11048603802919388,
"learning_rate": 4.214146852101092e-05,
"loss": 0.0177,
"step": 52600
},
{
"epoch": 15.76,
"grad_norm": 0.8540931940078735,
"learning_rate": 4.2126514131897716e-05,
"loss": 0.018,
"step": 52700
},
{
"epoch": 15.79,
"grad_norm": 0.3726775646209717,
"learning_rate": 4.211155974278451e-05,
"loss": 0.0181,
"step": 52800
},
{
"epoch": 15.82,
"grad_norm": 0.13543102145195007,
"learning_rate": 4.20966053536713e-05,
"loss": 0.0201,
"step": 52900
},
{
"epoch": 15.85,
"grad_norm": 0.3862367570400238,
"learning_rate": 4.20816509645581e-05,
"loss": 0.0184,
"step": 53000
},
{
"epoch": 15.85,
"eval_loss": 0.2746909558773041,
"eval_precision": 0.9290507850298093,
"eval_recall": 0.9164075248622187,
"eval_runtime": 304.9585,
"eval_samples_per_second": 43.855,
"eval_steps_per_second": 1.371,
"step": 53000
},
{
"epoch": 15.88,
"grad_norm": 0.5059983730316162,
"learning_rate": 4.2066696575444895e-05,
"loss": 0.0182,
"step": 53100
},
{
"epoch": 15.91,
"grad_norm": 0.45346036553382874,
"learning_rate": 4.205174218633169e-05,
"loss": 0.0208,
"step": 53200
},
{
"epoch": 15.94,
"grad_norm": 1.0658683776855469,
"learning_rate": 4.203678779721849e-05,
"loss": 0.0149,
"step": 53300
},
{
"epoch": 15.97,
"grad_norm": 0.2168959081172943,
"learning_rate": 4.202183340810528e-05,
"loss": 0.0191,
"step": 53400
},
{
"epoch": 16.0,
"grad_norm": 1.7620713710784912,
"learning_rate": 4.200687901899207e-05,
"loss": 0.0219,
"step": 53500
},
{
"epoch": 16.03,
"grad_norm": 0.33198004961013794,
"learning_rate": 4.199192462987887e-05,
"loss": 0.014,
"step": 53600
},
{
"epoch": 16.06,
"grad_norm": 3.614070415496826,
"learning_rate": 4.1976970240765665e-05,
"loss": 0.0132,
"step": 53700
},
{
"epoch": 16.09,
"grad_norm": 0.7846044898033142,
"learning_rate": 4.1962015851652465e-05,
"loss": 0.014,
"step": 53800
},
{
"epoch": 16.12,
"grad_norm": 1.2382973432540894,
"learning_rate": 4.194706146253926e-05,
"loss": 0.0198,
"step": 53900
},
{
"epoch": 16.15,
"grad_norm": 1.7487576007843018,
"learning_rate": 4.193210707342605e-05,
"loss": 0.0156,
"step": 54000
},
{
"epoch": 16.15,
"eval_loss": 0.27493321895599365,
"eval_precision": 0.926791958041958,
"eval_recall": 0.9140367622155855,
"eval_runtime": 304.8434,
"eval_samples_per_second": 43.872,
"eval_steps_per_second": 1.371,
"step": 54000
},
{
"epoch": 16.18,
"grad_norm": 2.473257541656494,
"learning_rate": 4.191715268431285e-05,
"loss": 0.0144,
"step": 54100
},
{
"epoch": 16.21,
"grad_norm": 1.7735458612442017,
"learning_rate": 4.1902198295199643e-05,
"loss": 0.0128,
"step": 54200
},
{
"epoch": 16.24,
"grad_norm": 0.09201900660991669,
"learning_rate": 4.1887243906086436e-05,
"loss": 0.0121,
"step": 54300
},
{
"epoch": 16.27,
"grad_norm": 4.265335559844971,
"learning_rate": 4.1872289516973236e-05,
"loss": 0.0193,
"step": 54400
},
{
"epoch": 16.3,
"grad_norm": 0.05550719425082207,
"learning_rate": 4.185733512786003e-05,
"loss": 0.0191,
"step": 54500
},
{
"epoch": 16.33,
"grad_norm": 1.2244312763214111,
"learning_rate": 4.184238073874682e-05,
"loss": 0.0144,
"step": 54600
},
{
"epoch": 16.36,
"grad_norm": 0.11609119921922684,
"learning_rate": 4.182742634963362e-05,
"loss": 0.0195,
"step": 54700
},
{
"epoch": 16.39,
"grad_norm": 0.7442992329597473,
"learning_rate": 4.1812471960520414e-05,
"loss": 0.0161,
"step": 54800
},
{
"epoch": 16.42,
"grad_norm": 1.913397192955017,
"learning_rate": 4.179751757140721e-05,
"loss": 0.017,
"step": 54900
},
{
"epoch": 16.45,
"grad_norm": 1.5975757837295532,
"learning_rate": 4.178256318229401e-05,
"loss": 0.0131,
"step": 55000
},
{
"epoch": 16.45,
"eval_loss": 0.28440138697624207,
"eval_precision": 0.9323552610821896,
"eval_recall": 0.9098494411773762,
"eval_runtime": 302.3846,
"eval_samples_per_second": 44.228,
"eval_steps_per_second": 1.382,
"step": 55000
},
{
"epoch": 16.48,
"grad_norm": 0.02616269886493683,
"learning_rate": 4.17676087931808e-05,
"loss": 0.0166,
"step": 55100
},
{
"epoch": 16.51,
"grad_norm": 0.270749032497406,
"learning_rate": 4.17526544040676e-05,
"loss": 0.0167,
"step": 55200
},
{
"epoch": 16.54,
"grad_norm": 0.8699542880058289,
"learning_rate": 4.173770001495439e-05,
"loss": 0.0178,
"step": 55300
},
{
"epoch": 16.57,
"grad_norm": 0.15558452904224396,
"learning_rate": 4.1722745625841185e-05,
"loss": 0.0155,
"step": 55400
},
{
"epoch": 16.6,
"grad_norm": 1.3881036043167114,
"learning_rate": 4.1707791236727985e-05,
"loss": 0.0162,
"step": 55500
},
{
"epoch": 16.63,
"grad_norm": 1.0590258836746216,
"learning_rate": 4.169283684761478e-05,
"loss": 0.019,
"step": 55600
},
{
"epoch": 16.66,
"grad_norm": 0.6527047157287598,
"learning_rate": 4.167788245850157e-05,
"loss": 0.0162,
"step": 55700
},
{
"epoch": 16.69,
"grad_norm": 0.7468928694725037,
"learning_rate": 4.166292806938837e-05,
"loss": 0.0187,
"step": 55800
},
{
"epoch": 16.72,
"grad_norm": 1.1580772399902344,
"learning_rate": 4.164797368027516e-05,
"loss": 0.0152,
"step": 55900
},
{
"epoch": 16.75,
"grad_norm": 0.27484288811683655,
"learning_rate": 4.1633019291161956e-05,
"loss": 0.018,
"step": 56000
},
{
"epoch": 16.75,
"eval_loss": 0.2911526560783386,
"eval_precision": 0.9246059786783004,
"eval_recall": 0.9265987253302134,
"eval_runtime": 304.1503,
"eval_samples_per_second": 43.972,
"eval_steps_per_second": 1.374,
"step": 56000
},
{
"epoch": 16.78,
"grad_norm": 0.12976956367492676,
"learning_rate": 4.1618064902048756e-05,
"loss": 0.0185,
"step": 56100
},
{
"epoch": 16.81,
"grad_norm": 0.37897953391075134,
"learning_rate": 4.160311051293555e-05,
"loss": 0.0152,
"step": 56200
},
{
"epoch": 16.84,
"grad_norm": 0.07681228220462799,
"learning_rate": 4.158815612382234e-05,
"loss": 0.0163,
"step": 56300
},
{
"epoch": 16.87,
"grad_norm": 0.5966798663139343,
"learning_rate": 4.157320173470914e-05,
"loss": 0.014,
"step": 56400
},
{
"epoch": 16.9,
"grad_norm": 0.29120373725891113,
"learning_rate": 4.1558247345595934e-05,
"loss": 0.018,
"step": 56500
},
{
"epoch": 16.93,
"grad_norm": 0.4325448274612427,
"learning_rate": 4.1543292956482734e-05,
"loss": 0.0145,
"step": 56600
},
{
"epoch": 16.96,
"grad_norm": 1.473797082901001,
"learning_rate": 4.1528338567369527e-05,
"loss": 0.0164,
"step": 56700
},
{
"epoch": 16.99,
"grad_norm": 0.963238537311554,
"learning_rate": 4.151338417825632e-05,
"loss": 0.0168,
"step": 56800
},
{
"epoch": 17.02,
"grad_norm": 1.2749171257019043,
"learning_rate": 4.149842978914312e-05,
"loss": 0.0172,
"step": 56900
},
{
"epoch": 17.05,
"grad_norm": 0.1201496422290802,
"learning_rate": 4.148347540002991e-05,
"loss": 0.0132,
"step": 57000
},
{
"epoch": 17.05,
"eval_loss": 0.2895963788032532,
"eval_precision": 0.9242246747641655,
"eval_recall": 0.9230579759229041,
"eval_runtime": 304.3955,
"eval_samples_per_second": 43.936,
"eval_steps_per_second": 1.373,
"step": 57000
},
{
"epoch": 17.08,
"grad_norm": 0.0923817902803421,
"learning_rate": 4.1468521010916705e-05,
"loss": 0.0155,
"step": 57100
},
{
"epoch": 17.11,
"grad_norm": 0.17687027156352997,
"learning_rate": 4.1453566621803505e-05,
"loss": 0.0142,
"step": 57200
},
{
"epoch": 17.14,
"grad_norm": 0.5095121264457703,
"learning_rate": 4.14386122326903e-05,
"loss": 0.0122,
"step": 57300
},
{
"epoch": 17.17,
"grad_norm": 0.14807282388210297,
"learning_rate": 4.142365784357709e-05,
"loss": 0.0122,
"step": 57400
},
{
"epoch": 17.19,
"grad_norm": 0.22806455194950104,
"learning_rate": 4.140870345446389e-05,
"loss": 0.0126,
"step": 57500
},
{
"epoch": 17.22,
"grad_norm": 0.1654992550611496,
"learning_rate": 4.139374906535068e-05,
"loss": 0.012,
"step": 57600
},
{
"epoch": 17.25,
"grad_norm": 1.1821808815002441,
"learning_rate": 4.1378794676237476e-05,
"loss": 0.0154,
"step": 57700
},
{
"epoch": 17.28,
"grad_norm": 0.33708083629608154,
"learning_rate": 4.1363840287124275e-05,
"loss": 0.0118,
"step": 57800
},
{
"epoch": 17.31,
"grad_norm": 0.2778627276420593,
"learning_rate": 4.134888589801107e-05,
"loss": 0.0153,
"step": 57900
},
{
"epoch": 17.34,
"grad_norm": 0.4350825250148773,
"learning_rate": 4.133393150889787e-05,
"loss": 0.0131,
"step": 58000
},
{
"epoch": 17.34,
"eval_loss": 0.2985839247703552,
"eval_precision": 0.9294326572576876,
"eval_recall": 0.9185011853813233,
"eval_runtime": 303.6403,
"eval_samples_per_second": 44.046,
"eval_steps_per_second": 1.377,
"step": 58000
},
{
"epoch": 17.37,
"grad_norm": 1.0241811275482178,
"learning_rate": 4.131897711978466e-05,
"loss": 0.0152,
"step": 58100
},
{
"epoch": 17.4,
"grad_norm": 0.705042839050293,
"learning_rate": 4.1304022730671454e-05,
"loss": 0.0165,
"step": 58200
},
{
"epoch": 17.43,
"grad_norm": 0.9130484461784363,
"learning_rate": 4.128906834155825e-05,
"loss": 0.0143,
"step": 58300
},
{
"epoch": 17.46,
"grad_norm": 0.0633108988404274,
"learning_rate": 4.127411395244504e-05,
"loss": 0.0147,
"step": 58400
},
{
"epoch": 17.49,
"grad_norm": 1.2173391580581665,
"learning_rate": 4.125915956333184e-05,
"loss": 0.0134,
"step": 58500
},
{
"epoch": 17.52,
"grad_norm": 2.9922380447387695,
"learning_rate": 4.124420517421864e-05,
"loss": 0.0145,
"step": 58600
},
{
"epoch": 17.55,
"grad_norm": 0.015288499183952808,
"learning_rate": 4.1229250785105425e-05,
"loss": 0.0169,
"step": 58700
},
{
"epoch": 17.58,
"grad_norm": 1.87058424949646,
"learning_rate": 4.1214296395992225e-05,
"loss": 0.0158,
"step": 58800
},
{
"epoch": 17.61,
"grad_norm": 0.31113335490226746,
"learning_rate": 4.1199342006879024e-05,
"loss": 0.0151,
"step": 58900
},
{
"epoch": 17.64,
"grad_norm": 0.8044542670249939,
"learning_rate": 4.118438761776582e-05,
"loss": 0.0143,
"step": 59000
},
{
"epoch": 17.64,
"eval_loss": 0.2973649501800537,
"eval_precision": 0.9298240060774879,
"eval_recall": 0.9044305551279288,
"eval_runtime": 302.1441,
"eval_samples_per_second": 44.264,
"eval_steps_per_second": 1.383,
"step": 59000
},
{
"epoch": 17.67,
"grad_norm": 0.08827254921197891,
"learning_rate": 4.116943322865261e-05,
"loss": 0.0157,
"step": 59100
},
{
"epoch": 17.7,
"grad_norm": 1.8845312595367432,
"learning_rate": 4.115447883953941e-05,
"loss": 0.0155,
"step": 59200
},
{
"epoch": 17.73,
"grad_norm": 0.49602124094963074,
"learning_rate": 4.11395244504262e-05,
"loss": 0.0162,
"step": 59300
},
{
"epoch": 17.76,
"grad_norm": 0.3592805564403534,
"learning_rate": 4.1124570061312995e-05,
"loss": 0.0149,
"step": 59400
},
{
"epoch": 17.79,
"grad_norm": 1.320101261138916,
"learning_rate": 4.110961567219979e-05,
"loss": 0.0156,
"step": 59500
},
{
"epoch": 17.82,
"grad_norm": 0.4389740526676178,
"learning_rate": 4.109466128308659e-05,
"loss": 0.0151,
"step": 59600
},
{
"epoch": 17.85,
"grad_norm": 1.6578569412231445,
"learning_rate": 4.107970689397339e-05,
"loss": 0.0166,
"step": 59700
},
{
"epoch": 17.88,
"grad_norm": 1.7992475032806396,
"learning_rate": 4.1064752504860174e-05,
"loss": 0.0148,
"step": 59800
},
{
"epoch": 17.91,
"grad_norm": 0.026478100568056107,
"learning_rate": 4.1049798115746973e-05,
"loss": 0.0158,
"step": 59900
},
{
"epoch": 17.94,
"grad_norm": 2.8473379611968994,
"learning_rate": 4.103484372663377e-05,
"loss": 0.0159,
"step": 60000
},
{
"epoch": 17.94,
"eval_loss": 0.2935677468776703,
"eval_precision": 0.9302795129030222,
"eval_recall": 0.9079097262846763,
"eval_runtime": 302.5843,
"eval_samples_per_second": 44.199,
"eval_steps_per_second": 1.381,
"step": 60000
},
{
"epoch": 17.97,
"grad_norm": 2.1734695434570312,
"learning_rate": 4.101988933752056e-05,
"loss": 0.0183,
"step": 60100
},
{
"epoch": 18.0,
"grad_norm": 0.14518772065639496,
"learning_rate": 4.100493494840736e-05,
"loss": 0.0172,
"step": 60200
},
{
"epoch": 18.03,
"grad_norm": 0.3986850380897522,
"learning_rate": 4.098998055929416e-05,
"loss": 0.0101,
"step": 60300
},
{
"epoch": 18.06,
"grad_norm": 1.78749680519104,
"learning_rate": 4.097502617018095e-05,
"loss": 0.0123,
"step": 60400
},
{
"epoch": 18.09,
"grad_norm": 0.43207836151123047,
"learning_rate": 4.0960071781067744e-05,
"loss": 0.0132,
"step": 60500
},
{
"epoch": 18.12,
"grad_norm": 0.11268942803144455,
"learning_rate": 4.0945117391954544e-05,
"loss": 0.0131,
"step": 60600
},
{
"epoch": 18.15,
"grad_norm": 0.5929433107376099,
"learning_rate": 4.093016300284134e-05,
"loss": 0.0118,
"step": 60700
},
{
"epoch": 18.18,
"grad_norm": 0.012462102808058262,
"learning_rate": 4.091520861372813e-05,
"loss": 0.0114,
"step": 60800
},
{
"epoch": 18.21,
"grad_norm": 0.03992025554180145,
"learning_rate": 4.090025422461492e-05,
"loss": 0.0123,
"step": 60900
},
{
"epoch": 18.24,
"grad_norm": 0.2556318938732147,
"learning_rate": 4.088529983550172e-05,
"loss": 0.0163,
"step": 61000
},
{
"epoch": 18.24,
"eval_loss": 0.3005661070346832,
"eval_precision": 0.930046845034112,
"eval_recall": 0.9108039040610856,
"eval_runtime": 303.0262,
"eval_samples_per_second": 44.135,
"eval_steps_per_second": 1.379,
"step": 61000
},
{
"epoch": 18.27,
"grad_norm": 0.0933234691619873,
"learning_rate": 4.087034544638852e-05,
"loss": 0.0139,
"step": 61100
},
{
"epoch": 18.3,
"grad_norm": 4.561667442321777,
"learning_rate": 4.085539105727531e-05,
"loss": 0.015,
"step": 61200
},
{
"epoch": 18.33,
"grad_norm": 1.8393715620040894,
"learning_rate": 4.084043666816211e-05,
"loss": 0.0113,
"step": 61300
},
{
"epoch": 18.36,
"grad_norm": 0.5815320611000061,
"learning_rate": 4.082548227904891e-05,
"loss": 0.0158,
"step": 61400
},
{
"epoch": 18.39,
"grad_norm": 0.9265565872192383,
"learning_rate": 4.0810527889935693e-05,
"loss": 0.0165,
"step": 61500
},
{
"epoch": 18.42,
"grad_norm": 0.029577825218439102,
"learning_rate": 4.079557350082249e-05,
"loss": 0.0151,
"step": 61600
},
{
"epoch": 18.45,
"grad_norm": 0.13609355688095093,
"learning_rate": 4.078061911170929e-05,
"loss": 0.0147,
"step": 61700
},
{
"epoch": 18.48,
"grad_norm": 0.2505282461643219,
"learning_rate": 4.0765664722596086e-05,
"loss": 0.0117,
"step": 61800
},
{
"epoch": 18.51,
"grad_norm": 0.49616509675979614,
"learning_rate": 4.075071033348288e-05,
"loss": 0.0136,
"step": 61900
},
{
"epoch": 18.54,
"grad_norm": 1.4143670797348022,
"learning_rate": 4.073575594436967e-05,
"loss": 0.0199,
"step": 62000
},
{
"epoch": 18.54,
"eval_loss": 0.28239989280700684,
"eval_precision": 0.9322552865754473,
"eval_recall": 0.89993534283691,
"eval_runtime": 303.1737,
"eval_samples_per_second": 44.113,
"eval_steps_per_second": 1.379,
"step": 62000
},
{
"epoch": 18.57,
"grad_norm": 2.5461013317108154,
"learning_rate": 4.072080155525647e-05,
"loss": 0.0122,
"step": 62100
},
{
"epoch": 18.6,
"grad_norm": 0.3786807358264923,
"learning_rate": 4.0705847166143264e-05,
"loss": 0.0122,
"step": 62200
},
{
"epoch": 18.63,
"grad_norm": 1.546884536743164,
"learning_rate": 4.069089277703006e-05,
"loss": 0.0133,
"step": 62300
},
{
"epoch": 18.66,
"grad_norm": 0.04791215434670448,
"learning_rate": 4.0675938387916856e-05,
"loss": 0.0118,
"step": 62400
},
{
"epoch": 18.69,
"grad_norm": 0.12534143030643463,
"learning_rate": 4.0660983998803656e-05,
"loss": 0.0145,
"step": 62500
},
{
"epoch": 18.72,
"grad_norm": 1.358917474746704,
"learning_rate": 4.064602960969044e-05,
"loss": 0.0152,
"step": 62600
},
{
"epoch": 18.75,
"grad_norm": 0.10757000744342804,
"learning_rate": 4.063107522057724e-05,
"loss": 0.0155,
"step": 62700
},
{
"epoch": 18.78,
"grad_norm": 2.365614652633667,
"learning_rate": 4.061612083146404e-05,
"loss": 0.0156,
"step": 62800
},
{
"epoch": 18.81,
"grad_norm": 0.4936872124671936,
"learning_rate": 4.060116644235083e-05,
"loss": 0.0132,
"step": 62900
},
{
"epoch": 18.84,
"grad_norm": 0.022019200026988983,
"learning_rate": 4.058621205323763e-05,
"loss": 0.0124,
"step": 63000
},
{
"epoch": 18.84,
"eval_loss": 0.30277740955352783,
"eval_precision": 0.930499515185637,
"eval_recall": 0.9159456879830044,
"eval_runtime": 304.0566,
"eval_samples_per_second": 43.985,
"eval_steps_per_second": 1.375,
"step": 63000
},
{
"epoch": 18.87,
"grad_norm": 0.3624964654445648,
"learning_rate": 4.057125766412442e-05,
"loss": 0.0155,
"step": 63100
},
{
"epoch": 18.9,
"grad_norm": 1.7629303932189941,
"learning_rate": 4.055630327501122e-05,
"loss": 0.0139,
"step": 63200
},
{
"epoch": 18.93,
"grad_norm": 0.18042436242103577,
"learning_rate": 4.054134888589801e-05,
"loss": 0.0179,
"step": 63300
},
{
"epoch": 18.96,
"grad_norm": 0.20951129496097565,
"learning_rate": 4.0526394496784806e-05,
"loss": 0.0172,
"step": 63400
},
{
"epoch": 18.99,
"grad_norm": 0.8891457915306091,
"learning_rate": 4.0511440107671605e-05,
"loss": 0.0126,
"step": 63500
},
{
"epoch": 19.02,
"grad_norm": 0.22427305579185486,
"learning_rate": 4.04964857185584e-05,
"loss": 0.0112,
"step": 63600
},
{
"epoch": 19.05,
"grad_norm": 0.25893327593803406,
"learning_rate": 4.048153132944519e-05,
"loss": 0.0123,
"step": 63700
},
{
"epoch": 19.08,
"grad_norm": 1.579196810722351,
"learning_rate": 4.046657694033199e-05,
"loss": 0.0117,
"step": 63800
},
{
"epoch": 19.11,
"grad_norm": 1.801465630531311,
"learning_rate": 4.045162255121879e-05,
"loss": 0.0113,
"step": 63900
},
{
"epoch": 19.14,
"grad_norm": 3.969907522201538,
"learning_rate": 4.0436668162105577e-05,
"loss": 0.0132,
"step": 64000
},
{
"epoch": 19.14,
"eval_loss": 0.3150152266025543,
"eval_precision": 0.9289555972482801,
"eval_recall": 0.9146833338464854,
"eval_runtime": 304.0309,
"eval_samples_per_second": 43.989,
"eval_steps_per_second": 1.375,
"step": 64000
},
{
"epoch": 19.17,
"grad_norm": 1.5782831907272339,
"learning_rate": 4.0421713772992376e-05,
"loss": 0.0106,
"step": 64100
},
{
"epoch": 19.2,
"grad_norm": 1.0305448770523071,
"learning_rate": 4.0406759383879176e-05,
"loss": 0.0115,
"step": 64200
},
{
"epoch": 19.23,
"grad_norm": 0.8879725337028503,
"learning_rate": 4.039180499476596e-05,
"loss": 0.0108,
"step": 64300
},
{
"epoch": 19.26,
"grad_norm": 1.0525989532470703,
"learning_rate": 4.037685060565276e-05,
"loss": 0.0113,
"step": 64400
},
{
"epoch": 19.29,
"grad_norm": 0.19859521090984344,
"learning_rate": 4.0361896216539554e-05,
"loss": 0.011,
"step": 64500
},
{
"epoch": 19.32,
"grad_norm": 1.628808856010437,
"learning_rate": 4.034694182742635e-05,
"loss": 0.0126,
"step": 64600
},
{
"epoch": 19.35,
"grad_norm": 0.45845118165016174,
"learning_rate": 4.033198743831315e-05,
"loss": 0.0117,
"step": 64700
},
{
"epoch": 19.38,
"grad_norm": 0.02105000615119934,
"learning_rate": 4.031703304919994e-05,
"loss": 0.0103,
"step": 64800
},
{
"epoch": 19.41,
"grad_norm": 1.2173235416412354,
"learning_rate": 4.030207866008674e-05,
"loss": 0.013,
"step": 64900
},
{
"epoch": 19.44,
"grad_norm": 1.0716986656188965,
"learning_rate": 4.028712427097353e-05,
"loss": 0.0136,
"step": 65000
},
{
"epoch": 19.44,
"eval_loss": 0.30169057846069336,
"eval_precision": 0.9307780320366132,
"eval_recall": 0.9016903229779242,
"eval_runtime": 303.9363,
"eval_samples_per_second": 44.003,
"eval_steps_per_second": 1.375,
"step": 65000
},
{
"epoch": 19.47,
"grad_norm": 0.060731422156095505,
"learning_rate": 4.0272169881860325e-05,
"loss": 0.0103,
"step": 65100
},
{
"epoch": 19.5,
"grad_norm": 1.8369615077972412,
"learning_rate": 4.0257215492747125e-05,
"loss": 0.0149,
"step": 65200
},
{
"epoch": 19.53,
"grad_norm": 0.5922613143920898,
"learning_rate": 4.024226110363392e-05,
"loss": 0.0137,
"step": 65300
},
{
"epoch": 19.56,
"grad_norm": 1.1230493783950806,
"learning_rate": 4.022730671452071e-05,
"loss": 0.016,
"step": 65400
},
{
"epoch": 19.59,
"grad_norm": 0.9484757781028748,
"learning_rate": 4.021235232540751e-05,
"loss": 0.0126,
"step": 65500
},
{
"epoch": 19.62,
"grad_norm": 0.40328437089920044,
"learning_rate": 4.01973979362943e-05,
"loss": 0.014,
"step": 65600
},
{
"epoch": 19.65,
"grad_norm": 1.251897931098938,
"learning_rate": 4.0182443547181096e-05,
"loss": 0.0152,
"step": 65700
},
{
"epoch": 19.68,
"grad_norm": 0.06640147417783737,
"learning_rate": 4.0167489158067896e-05,
"loss": 0.0119,
"step": 65800
},
{
"epoch": 19.71,
"grad_norm": 0.08419325947761536,
"learning_rate": 4.015253476895469e-05,
"loss": 0.0104,
"step": 65900
},
{
"epoch": 19.74,
"grad_norm": 0.8898499011993408,
"learning_rate": 4.013758037984148e-05,
"loss": 0.013,
"step": 66000
},
{
"epoch": 19.74,
"eval_loss": 0.30586904287338257,
"eval_precision": 0.9286385564814235,
"eval_recall": 0.9127128298285045,
"eval_runtime": 303.8354,
"eval_samples_per_second": 44.017,
"eval_steps_per_second": 1.376,
"step": 66000
},
{
"epoch": 19.77,
"grad_norm": 0.8399672508239746,
"learning_rate": 4.012262599072828e-05,
"loss": 0.0156,
"step": 66100
},
{
"epoch": 19.8,
"grad_norm": 1.188772201538086,
"learning_rate": 4.0107671601615074e-05,
"loss": 0.0133,
"step": 66200
},
{
"epoch": 19.83,
"grad_norm": 0.3390734791755676,
"learning_rate": 4.0092717212501874e-05,
"loss": 0.011,
"step": 66300
},
{
"epoch": 19.86,
"grad_norm": 2.0773940086364746,
"learning_rate": 4.007776282338867e-05,
"loss": 0.0109,
"step": 66400
},
{
"epoch": 19.89,
"grad_norm": 1.667506456375122,
"learning_rate": 4.006280843427546e-05,
"loss": 0.0121,
"step": 66500
},
{
"epoch": 19.92,
"grad_norm": 0.036488935351371765,
"learning_rate": 4.004785404516226e-05,
"loss": 0.0121,
"step": 66600
},
{
"epoch": 19.95,
"grad_norm": 0.9762794375419617,
"learning_rate": 4.003289965604905e-05,
"loss": 0.0138,
"step": 66700
},
{
"epoch": 19.98,
"grad_norm": 1.04608952999115,
"learning_rate": 4.0017945266935845e-05,
"loss": 0.0117,
"step": 66800
},
{
"epoch": 20.01,
"grad_norm": 5.332238674163818,
"learning_rate": 4.0002990877822645e-05,
"loss": 0.0137,
"step": 66900
},
{
"epoch": 20.04,
"grad_norm": 0.01725686341524124,
"learning_rate": 3.998803648870944e-05,
"loss": 0.0131,
"step": 67000
},
{
"epoch": 20.04,
"eval_loss": 0.2912316620349884,
"eval_precision": 0.9311961240797836,
"eval_recall": 0.9113273191908617,
"eval_runtime": 303.1004,
"eval_samples_per_second": 44.124,
"eval_steps_per_second": 1.379,
"step": 67000
},
{
"epoch": 20.07,
"grad_norm": 0.0427197702229023,
"learning_rate": 3.997308209959623e-05,
"loss": 0.0077,
"step": 67100
},
{
"epoch": 20.1,
"grad_norm": 0.017879147082567215,
"learning_rate": 3.995812771048303e-05,
"loss": 0.0104,
"step": 67200
},
{
"epoch": 20.13,
"grad_norm": 0.07891906797885895,
"learning_rate": 3.994317332136982e-05,
"loss": 0.0141,
"step": 67300
},
{
"epoch": 20.16,
"grad_norm": 0.16812817752361298,
"learning_rate": 3.9928218932256616e-05,
"loss": 0.0097,
"step": 67400
},
{
"epoch": 20.19,
"grad_norm": 3.0790505409240723,
"learning_rate": 3.9913264543143416e-05,
"loss": 0.0106,
"step": 67500
},
{
"epoch": 20.22,
"grad_norm": 0.41399437189102173,
"learning_rate": 3.989831015403021e-05,
"loss": 0.0089,
"step": 67600
},
{
"epoch": 20.25,
"grad_norm": 0.4379628300666809,
"learning_rate": 3.988335576491701e-05,
"loss": 0.0086,
"step": 67700
},
{
"epoch": 20.28,
"grad_norm": 0.011956513859331608,
"learning_rate": 3.98684013758038e-05,
"loss": 0.0133,
"step": 67800
},
{
"epoch": 20.31,
"grad_norm": 2.477144718170166,
"learning_rate": 3.9853446986690594e-05,
"loss": 0.0091,
"step": 67900
},
{
"epoch": 20.33,
"grad_norm": 2.790292739868164,
"learning_rate": 3.9838492597577394e-05,
"loss": 0.0128,
"step": 68000
},
{
"epoch": 20.33,
"eval_loss": 0.3076106309890747,
"eval_precision": 0.9304780813715294,
"eval_recall": 0.9090489239200714,
"eval_runtime": 303.9942,
"eval_samples_per_second": 43.994,
"eval_steps_per_second": 1.375,
"step": 68000
},
{
"epoch": 20.36,
"grad_norm": 1.441587209701538,
"learning_rate": 3.9823538208464186e-05,
"loss": 0.0159,
"step": 68100
},
{
"epoch": 20.39,
"grad_norm": 1.7005335092544556,
"learning_rate": 3.980858381935098e-05,
"loss": 0.01,
"step": 68200
},
{
"epoch": 20.42,
"grad_norm": 0.30774638056755066,
"learning_rate": 3.979362943023778e-05,
"loss": 0.0124,
"step": 68300
},
{
"epoch": 20.45,
"grad_norm": 0.04803008586168289,
"learning_rate": 3.977867504112457e-05,
"loss": 0.0112,
"step": 68400
},
{
"epoch": 20.48,
"grad_norm": 3.551407814025879,
"learning_rate": 3.9763720652011365e-05,
"loss": 0.012,
"step": 68500
},
{
"epoch": 20.51,
"grad_norm": 0.037427909672260284,
"learning_rate": 3.9748766262898164e-05,
"loss": 0.0138,
"step": 68600
},
{
"epoch": 20.54,
"grad_norm": 0.0066105336882174015,
"learning_rate": 3.973381187378496e-05,
"loss": 0.0114,
"step": 68700
},
{
"epoch": 20.57,
"grad_norm": 0.05352969095110893,
"learning_rate": 3.971885748467175e-05,
"loss": 0.0106,
"step": 68800
},
{
"epoch": 20.6,
"grad_norm": 1.097419023513794,
"learning_rate": 3.970390309555855e-05,
"loss": 0.0113,
"step": 68900
},
{
"epoch": 20.63,
"grad_norm": 2.4684622287750244,
"learning_rate": 3.968894870644534e-05,
"loss": 0.0104,
"step": 69000
},
{
"epoch": 20.63,
"eval_loss": 0.3140137493610382,
"eval_precision": 0.9268018018018018,
"eval_recall": 0.9122202038240094,
"eval_runtime": 304.685,
"eval_samples_per_second": 43.895,
"eval_steps_per_second": 1.372,
"step": 69000
},
{
"epoch": 20.66,
"grad_norm": 0.03651382029056549,
"learning_rate": 3.967399431733214e-05,
"loss": 0.0086,
"step": 69100
},
{
"epoch": 20.69,
"grad_norm": 0.35381224751472473,
"learning_rate": 3.9659039928218935e-05,
"loss": 0.013,
"step": 69200
},
{
"epoch": 20.72,
"grad_norm": 0.06933160871267319,
"learning_rate": 3.964408553910573e-05,
"loss": 0.0106,
"step": 69300
},
{
"epoch": 20.75,
"grad_norm": 0.4022979140281677,
"learning_rate": 3.962913114999253e-05,
"loss": 0.013,
"step": 69400
},
{
"epoch": 20.78,
"grad_norm": 0.03529789671301842,
"learning_rate": 3.961417676087932e-05,
"loss": 0.0156,
"step": 69500
},
{
"epoch": 20.81,
"grad_norm": 0.7010594606399536,
"learning_rate": 3.9599222371766114e-05,
"loss": 0.0144,
"step": 69600
},
{
"epoch": 20.84,
"grad_norm": 0.37523359060287476,
"learning_rate": 3.958426798265291e-05,
"loss": 0.0127,
"step": 69700
},
{
"epoch": 20.87,
"grad_norm": 0.1500304788351059,
"learning_rate": 3.9569313593539706e-05,
"loss": 0.0151,
"step": 69800
},
{
"epoch": 20.9,
"grad_norm": 1.1849136352539062,
"learning_rate": 3.95543592044265e-05,
"loss": 0.0092,
"step": 69900
},
{
"epoch": 20.93,
"grad_norm": 0.37061455845832825,
"learning_rate": 3.95394048153133e-05,
"loss": 0.0125,
"step": 70000
},
{
"epoch": 20.93,
"eval_loss": 0.2996491491794586,
"eval_precision": 0.9277798530693563,
"eval_recall": 0.9176390898734567,
"eval_runtime": 305.225,
"eval_samples_per_second": 43.817,
"eval_steps_per_second": 1.369,
"step": 70000
},
{
"epoch": 20.96,
"grad_norm": 1.1082910299301147,
"learning_rate": 3.952445042620009e-05,
"loss": 0.0135,
"step": 70100
},
{
"epoch": 20.99,
"grad_norm": 0.21670883893966675,
"learning_rate": 3.9509496037086884e-05,
"loss": 0.0147,
"step": 70200
},
{
"epoch": 21.02,
"grad_norm": 1.7163949012756348,
"learning_rate": 3.9494541647973684e-05,
"loss": 0.0074,
"step": 70300
},
{
"epoch": 21.05,
"grad_norm": 0.49197930097579956,
"learning_rate": 3.947958725886048e-05,
"loss": 0.009,
"step": 70400
},
{
"epoch": 21.08,
"grad_norm": 0.20454080402851105,
"learning_rate": 3.946463286974727e-05,
"loss": 0.0106,
"step": 70500
},
{
"epoch": 21.11,
"grad_norm": 1.1480427980422974,
"learning_rate": 3.944967848063407e-05,
"loss": 0.0082,
"step": 70600
},
{
"epoch": 21.14,
"grad_norm": 0.012445613741874695,
"learning_rate": 3.943472409152086e-05,
"loss": 0.0124,
"step": 70700
},
{
"epoch": 21.17,
"grad_norm": 1.2859218120574951,
"learning_rate": 3.941976970240766e-05,
"loss": 0.0114,
"step": 70800
},
{
"epoch": 21.2,
"grad_norm": 1.9639800786972046,
"learning_rate": 3.9404815313294455e-05,
"loss": 0.0094,
"step": 70900
},
{
"epoch": 21.23,
"grad_norm": 0.5322540402412415,
"learning_rate": 3.938986092418125e-05,
"loss": 0.0127,
"step": 71000
},
{
"epoch": 21.23,
"eval_loss": 0.31439679861068726,
"eval_precision": 0.9300875853255618,
"eval_recall": 0.918747498383571,
"eval_runtime": 305.1026,
"eval_samples_per_second": 43.834,
"eval_steps_per_second": 1.37,
"step": 71000
},
{
"epoch": 21.26,
"grad_norm": 0.7698822021484375,
"learning_rate": 3.937490653506805e-05,
"loss": 0.0091,
"step": 71100
},
{
"epoch": 21.29,
"grad_norm": 0.058869846165180206,
"learning_rate": 3.935995214595484e-05,
"loss": 0.0116,
"step": 71200
},
{
"epoch": 21.32,
"grad_norm": 0.040317438542842865,
"learning_rate": 3.934499775684163e-05,
"loss": 0.0082,
"step": 71300
},
{
"epoch": 21.35,
"grad_norm": 0.3180629014968872,
"learning_rate": 3.933004336772843e-05,
"loss": 0.0086,
"step": 71400
},
{
"epoch": 21.38,
"grad_norm": 0.14002850651741028,
"learning_rate": 3.9315088978615226e-05,
"loss": 0.0083,
"step": 71500
},
{
"epoch": 21.41,
"grad_norm": 0.535882830619812,
"learning_rate": 3.930013458950202e-05,
"loss": 0.0083,
"step": 71600
},
{
"epoch": 21.44,
"grad_norm": 0.8898109793663025,
"learning_rate": 3.928518020038882e-05,
"loss": 0.0111,
"step": 71700
},
{
"epoch": 21.47,
"grad_norm": 7.178394317626953,
"learning_rate": 3.927022581127561e-05,
"loss": 0.0111,
"step": 71800
},
{
"epoch": 21.5,
"grad_norm": 0.03290112316608429,
"learning_rate": 3.9255271422162404e-05,
"loss": 0.0102,
"step": 71900
},
{
"epoch": 21.53,
"grad_norm": 0.013704554177820683,
"learning_rate": 3.9240317033049204e-05,
"loss": 0.0131,
"step": 72000
},
{
"epoch": 21.53,
"eval_loss": 0.30643701553344727,
"eval_precision": 0.9271496444430644,
"eval_recall": 0.9192709135133471,
"eval_runtime": 304.1697,
"eval_samples_per_second": 43.969,
"eval_steps_per_second": 1.374,
"step": 72000
},
{
"epoch": 21.56,
"grad_norm": 0.8118484020233154,
"learning_rate": 3.9225362643936e-05,
"loss": 0.0109,
"step": 72100
},
{
"epoch": 21.59,
"grad_norm": 0.8789449334144592,
"learning_rate": 3.9210408254822796e-05,
"loss": 0.0111,
"step": 72200
},
{
"epoch": 21.62,
"grad_norm": 1.8666021823883057,
"learning_rate": 3.919545386570959e-05,
"loss": 0.0112,
"step": 72300
},
{
"epoch": 21.65,
"grad_norm": 0.33622369170188904,
"learning_rate": 3.918049947659638e-05,
"loss": 0.0121,
"step": 72400
},
{
"epoch": 21.68,
"grad_norm": 1.5097126960754395,
"learning_rate": 3.916554508748318e-05,
"loss": 0.0104,
"step": 72500
},
{
"epoch": 21.71,
"grad_norm": 1.3149192333221436,
"learning_rate": 3.915059069836997e-05,
"loss": 0.01,
"step": 72600
},
{
"epoch": 21.74,
"grad_norm": 1.1172950267791748,
"learning_rate": 3.913563630925677e-05,
"loss": 0.0159,
"step": 72700
},
{
"epoch": 21.77,
"grad_norm": 0.7861026525497437,
"learning_rate": 3.912068192014357e-05,
"loss": 0.0102,
"step": 72800
},
{
"epoch": 21.8,
"grad_norm": 0.9385488033294678,
"learning_rate": 3.910572753103036e-05,
"loss": 0.0103,
"step": 72900
},
{
"epoch": 21.83,
"grad_norm": 0.2858407199382782,
"learning_rate": 3.909077314191715e-05,
"loss": 0.0095,
"step": 73000
},
{
"epoch": 21.83,
"eval_loss": 0.3220088481903076,
"eval_precision": 0.9313063063063063,
"eval_recall": 0.89119123125712,
"eval_runtime": 301.1978,
"eval_samples_per_second": 44.403,
"eval_steps_per_second": 1.388,
"step": 73000
},
{
"epoch": 21.86,
"grad_norm": 2.1585566997528076,
"learning_rate": 3.907581875280395e-05,
"loss": 0.0107,
"step": 73100
},
{
"epoch": 21.89,
"grad_norm": 0.21467708051204681,
"learning_rate": 3.9060864363690745e-05,
"loss": 0.0092,
"step": 73200
},
{
"epoch": 21.92,
"grad_norm": 0.0250945333391428,
"learning_rate": 3.904590997457754e-05,
"loss": 0.0095,
"step": 73300
},
{
"epoch": 21.95,
"grad_norm": 0.08200676739215851,
"learning_rate": 3.903095558546434e-05,
"loss": 0.0127,
"step": 73400
},
{
"epoch": 21.98,
"grad_norm": 7.951723098754883,
"learning_rate": 3.901600119635113e-05,
"loss": 0.0118,
"step": 73500
},
{
"epoch": 22.01,
"grad_norm": 0.042703770101070404,
"learning_rate": 3.900104680723793e-05,
"loss": 0.0086,
"step": 73600
},
{
"epoch": 22.04,
"grad_norm": 0.13317295908927917,
"learning_rate": 3.898609241812472e-05,
"loss": 0.0117,
"step": 73700
},
{
"epoch": 22.07,
"grad_norm": 0.09529834240674973,
"learning_rate": 3.8971138029011516e-05,
"loss": 0.0077,
"step": 73800
},
{
"epoch": 22.1,
"grad_norm": 1.2312837839126587,
"learning_rate": 3.8956183639898316e-05,
"loss": 0.01,
"step": 73900
},
{
"epoch": 22.13,
"grad_norm": 0.20264630019664764,
"learning_rate": 3.89412292507851e-05,
"loss": 0.0079,
"step": 74000
},
{
"epoch": 22.13,
"eval_loss": 0.3207722306251526,
"eval_precision": 0.9257851445663011,
"eval_recall": 0.9148680685981712,
"eval_runtime": 304.4363,
"eval_samples_per_second": 43.93,
"eval_steps_per_second": 1.373,
"step": 74000
},
{
"epoch": 22.16,
"grad_norm": 0.007298531476408243,
"learning_rate": 3.89262748616719e-05,
"loss": 0.0083,
"step": 74100
},
{
"epoch": 22.19,
"grad_norm": 0.030803361907601357,
"learning_rate": 3.89113204725587e-05,
"loss": 0.0128,
"step": 74200
},
{
"epoch": 22.22,
"grad_norm": 0.04404568299651146,
"learning_rate": 3.8896366083445494e-05,
"loss": 0.0094,
"step": 74300
},
{
"epoch": 22.25,
"grad_norm": 0.14884673058986664,
"learning_rate": 3.888141169433229e-05,
"loss": 0.0081,
"step": 74400
},
{
"epoch": 22.28,
"grad_norm": 0.07467024773359299,
"learning_rate": 3.886645730521909e-05,
"loss": 0.0144,
"step": 74500
},
{
"epoch": 22.31,
"grad_norm": 0.6713554859161377,
"learning_rate": 3.885150291610588e-05,
"loss": 0.0136,
"step": 74600
},
{
"epoch": 22.34,
"grad_norm": 0.16354040801525116,
"learning_rate": 3.883654852699267e-05,
"loss": 0.0109,
"step": 74700
},
{
"epoch": 22.37,
"grad_norm": 1.4964691400527954,
"learning_rate": 3.882159413787947e-05,
"loss": 0.0116,
"step": 74800
},
{
"epoch": 22.4,
"grad_norm": 1.4973292350769043,
"learning_rate": 3.8806639748766265e-05,
"loss": 0.008,
"step": 74900
},
{
"epoch": 22.43,
"grad_norm": 0.17059992253780365,
"learning_rate": 3.8791685359653065e-05,
"loss": 0.0111,
"step": 75000
},
{
"epoch": 22.43,
"eval_loss": 0.30246666073799133,
"eval_precision": 0.9313384217417686,
"eval_recall": 0.8979032605683672,
"eval_runtime": 301.8023,
"eval_samples_per_second": 44.314,
"eval_steps_per_second": 1.385,
"step": 75000
},
{
"epoch": 22.46,
"grad_norm": 0.05614122748374939,
"learning_rate": 3.877673097053985e-05,
"loss": 0.0101,
"step": 75100
},
{
"epoch": 22.49,
"grad_norm": 0.23737676441669464,
"learning_rate": 3.876177658142665e-05,
"loss": 0.0111,
"step": 75200
},
{
"epoch": 22.52,
"grad_norm": 0.11609382182359695,
"learning_rate": 3.874682219231345e-05,
"loss": 0.0129,
"step": 75300
},
{
"epoch": 22.55,
"grad_norm": 0.006964783184230328,
"learning_rate": 3.8731867803200236e-05,
"loss": 0.014,
"step": 75400
},
{
"epoch": 22.58,
"grad_norm": 0.6018117070198059,
"learning_rate": 3.8716913414087036e-05,
"loss": 0.0092,
"step": 75500
},
{
"epoch": 22.61,
"grad_norm": 1.5463790893554688,
"learning_rate": 3.8701959024973836e-05,
"loss": 0.0129,
"step": 75600
},
{
"epoch": 22.64,
"grad_norm": 0.3491170108318329,
"learning_rate": 3.868700463586062e-05,
"loss": 0.0124,
"step": 75700
},
{
"epoch": 22.67,
"grad_norm": 0.3379780650138855,
"learning_rate": 3.867205024674742e-05,
"loss": 0.0105,
"step": 75800
},
{
"epoch": 22.7,
"grad_norm": 0.6625536680221558,
"learning_rate": 3.865709585763422e-05,
"loss": 0.0101,
"step": 75900
},
{
"epoch": 22.73,
"grad_norm": 0.5047014951705933,
"learning_rate": 3.8642141468521014e-05,
"loss": 0.0116,
"step": 76000
},
{
"epoch": 22.73,
"eval_loss": 0.309579074382782,
"eval_precision": 0.9289195145420119,
"eval_recall": 0.9214261522830136,
"eval_runtime": 306.5207,
"eval_samples_per_second": 43.632,
"eval_steps_per_second": 1.364,
"step": 76000
},
{
"epoch": 22.76,
"grad_norm": 2.8879668712615967,
"learning_rate": 3.862718707940781e-05,
"loss": 0.0084,
"step": 76100
},
{
"epoch": 22.79,
"grad_norm": 1.4628148078918457,
"learning_rate": 3.86122326902946e-05,
"loss": 0.0091,
"step": 76200
},
{
"epoch": 22.82,
"grad_norm": 0.01455759722739458,
"learning_rate": 3.85972783011814e-05,
"loss": 0.0087,
"step": 76300
},
{
"epoch": 22.85,
"grad_norm": 0.005665886681526899,
"learning_rate": 3.858232391206819e-05,
"loss": 0.0117,
"step": 76400
},
{
"epoch": 22.88,
"grad_norm": 0.5273276567459106,
"learning_rate": 3.8567369522954985e-05,
"loss": 0.009,
"step": 76500
},
{
"epoch": 22.91,
"grad_norm": 0.06718481332063675,
"learning_rate": 3.8552415133841785e-05,
"loss": 0.0118,
"step": 76600
},
{
"epoch": 22.94,
"grad_norm": 0.30258700251579285,
"learning_rate": 3.8537460744728585e-05,
"loss": 0.0109,
"step": 76700
},
{
"epoch": 22.97,
"grad_norm": 2.678166627883911,
"learning_rate": 3.852250635561537e-05,
"loss": 0.015,
"step": 76800
},
{
"epoch": 23.0,
"grad_norm": 0.15017007291316986,
"learning_rate": 3.850755196650217e-05,
"loss": 0.0104,
"step": 76900
},
{
"epoch": 23.03,
"grad_norm": 0.3501853048801422,
"learning_rate": 3.849259757738897e-05,
"loss": 0.0096,
"step": 77000
},
{
"epoch": 23.03,
"eval_loss": 0.2935163080692291,
"eval_precision": 0.9276991482965932,
"eval_recall": 0.9121894146987284,
"eval_runtime": 303.8246,
"eval_samples_per_second": 44.019,
"eval_steps_per_second": 1.376,
"step": 77000
},
{
"epoch": 23.06,
"grad_norm": 0.729576587677002,
"learning_rate": 3.8477643188275756e-05,
"loss": 0.0076,
"step": 77100
},
{
"epoch": 23.09,
"grad_norm": 0.03431198373436928,
"learning_rate": 3.8462688799162556e-05,
"loss": 0.0068,
"step": 77200
},
{
"epoch": 23.12,
"grad_norm": 0.022281186655163765,
"learning_rate": 3.844773441004935e-05,
"loss": 0.0099,
"step": 77300
},
{
"epoch": 23.15,
"grad_norm": 0.06289653480052948,
"learning_rate": 3.843278002093615e-05,
"loss": 0.0088,
"step": 77400
},
{
"epoch": 23.18,
"grad_norm": 1.1686757802963257,
"learning_rate": 3.841782563182294e-05,
"loss": 0.0113,
"step": 77500
},
{
"epoch": 23.21,
"grad_norm": 0.6460024118423462,
"learning_rate": 3.8402871242709734e-05,
"loss": 0.0098,
"step": 77600
},
{
"epoch": 23.24,
"grad_norm": 0.04333605244755745,
"learning_rate": 3.8387916853596534e-05,
"loss": 0.0078,
"step": 77700
},
{
"epoch": 23.27,
"grad_norm": 1.6560355424880981,
"learning_rate": 3.8372962464483327e-05,
"loss": 0.0069,
"step": 77800
},
{
"epoch": 23.3,
"grad_norm": 1.7110439538955688,
"learning_rate": 3.835800807537012e-05,
"loss": 0.0079,
"step": 77900
},
{
"epoch": 23.33,
"grad_norm": 0.34755662083625793,
"learning_rate": 3.834305368625692e-05,
"loss": 0.0117,
"step": 78000
},
{
"epoch": 23.33,
"eval_loss": 0.31362003087997437,
"eval_precision": 0.9317794739166089,
"eval_recall": 0.9096031281751286,
"eval_runtime": 302.9137,
"eval_samples_per_second": 44.151,
"eval_steps_per_second": 1.38,
"step": 78000
},
{
"epoch": 23.36,
"grad_norm": 0.07322967052459717,
"learning_rate": 3.832809929714372e-05,
"loss": 0.0086,
"step": 78100
},
{
"epoch": 23.39,
"grad_norm": 0.1620834916830063,
"learning_rate": 3.8313144908030505e-05,
"loss": 0.0105,
"step": 78200
},
{
"epoch": 23.42,
"grad_norm": 1.0541850328445435,
"learning_rate": 3.8298190518917305e-05,
"loss": 0.011,
"step": 78300
},
{
"epoch": 23.44,
"grad_norm": 0.008509721606969833,
"learning_rate": 3.8283236129804104e-05,
"loss": 0.009,
"step": 78400
},
{
"epoch": 23.47,
"grad_norm": 0.2723921537399292,
"learning_rate": 3.826828174069089e-05,
"loss": 0.0089,
"step": 78500
},
{
"epoch": 23.5,
"grad_norm": 0.7700883150100708,
"learning_rate": 3.825332735157769e-05,
"loss": 0.0084,
"step": 78600
},
{
"epoch": 23.53,
"grad_norm": 0.7245194911956787,
"learning_rate": 3.823837296246448e-05,
"loss": 0.0068,
"step": 78700
},
{
"epoch": 23.56,
"grad_norm": 1.283056378364563,
"learning_rate": 3.822341857335128e-05,
"loss": 0.0108,
"step": 78800
},
{
"epoch": 23.59,
"grad_norm": 0.016398323699831963,
"learning_rate": 3.8208464184238075e-05,
"loss": 0.0104,
"step": 78900
},
{
"epoch": 23.62,
"grad_norm": 0.32268649339675903,
"learning_rate": 3.819350979512487e-05,
"loss": 0.0085,
"step": 79000
},
{
"epoch": 23.62,
"eval_loss": 0.30707934498786926,
"eval_precision": 0.9256538985992314,
"eval_recall": 0.9196403830167185,
"eval_runtime": 304.8987,
"eval_samples_per_second": 43.864,
"eval_steps_per_second": 1.371,
"step": 79000
},
{
"epoch": 23.65,
"grad_norm": 0.1340191662311554,
"learning_rate": 3.817855540601167e-05,
"loss": 0.0132,
"step": 79100
},
{
"epoch": 23.68,
"grad_norm": 1.2741714715957642,
"learning_rate": 3.816360101689846e-05,
"loss": 0.0086,
"step": 79200
},
{
"epoch": 23.71,
"grad_norm": 3.2270684242248535,
"learning_rate": 3.8148646627785254e-05,
"loss": 0.012,
"step": 79300
},
{
"epoch": 23.74,
"grad_norm": 0.0873398706316948,
"learning_rate": 3.813369223867205e-05,
"loss": 0.0071,
"step": 79400
},
{
"epoch": 23.77,
"grad_norm": 0.36740046739578247,
"learning_rate": 3.811873784955885e-05,
"loss": 0.0082,
"step": 79500
},
{
"epoch": 23.8,
"grad_norm": 0.7461920976638794,
"learning_rate": 3.810378346044564e-05,
"loss": 0.0133,
"step": 79600
},
{
"epoch": 23.83,
"grad_norm": 1.0577598810195923,
"learning_rate": 3.808882907133244e-05,
"loss": 0.0118,
"step": 79700
},
{
"epoch": 23.86,
"grad_norm": 1.9472182989120483,
"learning_rate": 3.807387468221923e-05,
"loss": 0.0116,
"step": 79800
},
{
"epoch": 23.89,
"grad_norm": 1.6104402542114258,
"learning_rate": 3.8058920293106025e-05,
"loss": 0.0114,
"step": 79900
},
{
"epoch": 23.92,
"grad_norm": 0.03251710161566734,
"learning_rate": 3.8043965903992824e-05,
"loss": 0.0091,
"step": 80000
},
{
"epoch": 23.92,
"eval_loss": 0.3046566843986511,
"eval_precision": 0.9268397735663303,
"eval_recall": 0.9275531882139229,
"eval_runtime": 305.7377,
"eval_samples_per_second": 43.743,
"eval_steps_per_second": 1.367,
"step": 80000
},
{
"epoch": 23.95,
"grad_norm": 0.8245527744293213,
"learning_rate": 3.802901151487962e-05,
"loss": 0.0067,
"step": 80100
},
{
"epoch": 23.98,
"grad_norm": 2.3082966804504395,
"learning_rate": 3.801405712576642e-05,
"loss": 0.0103,
"step": 80200
},
{
"epoch": 24.01,
"grad_norm": 0.05168503150343895,
"learning_rate": 3.799910273665321e-05,
"loss": 0.0086,
"step": 80300
},
{
"epoch": 24.04,
"grad_norm": 0.3247091770172119,
"learning_rate": 3.798414834754e-05,
"loss": 0.0082,
"step": 80400
},
{
"epoch": 24.07,
"grad_norm": 0.30284127593040466,
"learning_rate": 3.79691939584268e-05,
"loss": 0.0065,
"step": 80500
},
{
"epoch": 24.1,
"grad_norm": 0.041343070566654205,
"learning_rate": 3.7954239569313595e-05,
"loss": 0.0072,
"step": 80600
},
{
"epoch": 24.13,
"grad_norm": 0.5980477929115295,
"learning_rate": 3.793928518020039e-05,
"loss": 0.0088,
"step": 80700
},
{
"epoch": 24.16,
"grad_norm": 0.0064304666593670845,
"learning_rate": 3.792433079108719e-05,
"loss": 0.0094,
"step": 80800
},
{
"epoch": 24.19,
"grad_norm": 0.6040250062942505,
"learning_rate": 3.790937640197398e-05,
"loss": 0.0079,
"step": 80900
},
{
"epoch": 24.22,
"grad_norm": 0.3337300419807434,
"learning_rate": 3.7894422012860773e-05,
"loss": 0.0086,
"step": 81000
},
{
"epoch": 24.22,
"eval_loss": 0.3350207209587097,
"eval_precision": 0.9268361054008597,
"eval_recall": 0.916192000985252,
"eval_runtime": 304.7162,
"eval_samples_per_second": 43.89,
"eval_steps_per_second": 1.372,
"step": 81000
},
{
"epoch": 24.25,
"grad_norm": 0.710114061832428,
"learning_rate": 3.787946762374757e-05,
"loss": 0.008,
"step": 81100
},
{
"epoch": 24.28,
"grad_norm": 0.03623099625110626,
"learning_rate": 3.7864513234634366e-05,
"loss": 0.0131,
"step": 81200
},
{
"epoch": 24.31,
"grad_norm": 0.09887418150901794,
"learning_rate": 3.784955884552116e-05,
"loss": 0.0086,
"step": 81300
},
{
"epoch": 24.34,
"grad_norm": 0.6916789412498474,
"learning_rate": 3.783460445640796e-05,
"loss": 0.0101,
"step": 81400
},
{
"epoch": 24.37,
"grad_norm": 1.4278247356414795,
"learning_rate": 3.781965006729475e-05,
"loss": 0.0107,
"step": 81500
},
{
"epoch": 24.4,
"grad_norm": 0.16397880017757416,
"learning_rate": 3.7804695678181544e-05,
"loss": 0.008,
"step": 81600
},
{
"epoch": 24.43,
"grad_norm": 0.08632964640855789,
"learning_rate": 3.7789741289068344e-05,
"loss": 0.0078,
"step": 81700
},
{
"epoch": 24.46,
"grad_norm": 2.2472782135009766,
"learning_rate": 3.777478689995514e-05,
"loss": 0.011,
"step": 81800
},
{
"epoch": 24.49,
"grad_norm": 0.14701958000659943,
"learning_rate": 3.7759832510841936e-05,
"loss": 0.0096,
"step": 81900
},
{
"epoch": 24.52,
"grad_norm": 0.051196735352277756,
"learning_rate": 3.774487812172873e-05,
"loss": 0.0111,
"step": 82000
},
{
"epoch": 24.52,
"eval_loss": 0.30252349376678467,
"eval_precision": 0.928390712570056,
"eval_recall": 0.8925459527694818,
"eval_runtime": 302.8814,
"eval_samples_per_second": 44.156,
"eval_steps_per_second": 1.38,
"step": 82000
},
{
"epoch": 24.55,
"grad_norm": 0.013324776664376259,
"learning_rate": 3.772992373261552e-05,
"loss": 0.0075,
"step": 82100
},
{
"epoch": 24.58,
"grad_norm": 0.10291430354118347,
"learning_rate": 3.771496934350232e-05,
"loss": 0.0099,
"step": 82200
},
{
"epoch": 24.61,
"grad_norm": 0.07137342542409897,
"learning_rate": 3.7700014954389115e-05,
"loss": 0.012,
"step": 82300
},
{
"epoch": 24.64,
"grad_norm": 0.3020240068435669,
"learning_rate": 3.768506056527591e-05,
"loss": 0.0087,
"step": 82400
},
{
"epoch": 24.67,
"grad_norm": 1.067194938659668,
"learning_rate": 3.767010617616271e-05,
"loss": 0.0096,
"step": 82500
},
{
"epoch": 24.7,
"grad_norm": 0.014255263842642307,
"learning_rate": 3.76551517870495e-05,
"loss": 0.007,
"step": 82600
},
{
"epoch": 24.73,
"grad_norm": 0.02688017673790455,
"learning_rate": 3.764019739793629e-05,
"loss": 0.0089,
"step": 82700
},
{
"epoch": 24.76,
"grad_norm": 0.3376453220844269,
"learning_rate": 3.762524300882309e-05,
"loss": 0.0066,
"step": 82800
},
{
"epoch": 24.79,
"grad_norm": 0.10389913618564606,
"learning_rate": 3.7610288619709886e-05,
"loss": 0.0066,
"step": 82900
},
{
"epoch": 24.82,
"grad_norm": 0.7046878337860107,
"learning_rate": 3.759533423059668e-05,
"loss": 0.01,
"step": 83000
},
{
"epoch": 24.82,
"eval_loss": 0.3185621201992035,
"eval_precision": 0.9291735873891379,
"eval_recall": 0.9128667754549094,
"eval_runtime": 303.4192,
"eval_samples_per_second": 44.078,
"eval_steps_per_second": 1.378,
"step": 83000
},
{
"epoch": 24.85,
"grad_norm": 0.4447859227657318,
"learning_rate": 3.758037984148348e-05,
"loss": 0.0085,
"step": 83100
},
{
"epoch": 24.88,
"grad_norm": 2.2701525688171387,
"learning_rate": 3.756542545237027e-05,
"loss": 0.0114,
"step": 83200
},
{
"epoch": 24.91,
"grad_norm": 0.05526027828454971,
"learning_rate": 3.755047106325707e-05,
"loss": 0.012,
"step": 83300
},
{
"epoch": 24.94,
"grad_norm": 0.8909191489219666,
"learning_rate": 3.7535516674143864e-05,
"loss": 0.0097,
"step": 83400
},
{
"epoch": 24.97,
"grad_norm": 0.004659523721784353,
"learning_rate": 3.7520562285030656e-05,
"loss": 0.0085,
"step": 83500
},
{
"epoch": 25.0,
"grad_norm": 0.05222604423761368,
"learning_rate": 3.7505607895917456e-05,
"loss": 0.0088,
"step": 83600
},
{
"epoch": 25.03,
"grad_norm": 0.014093970879912376,
"learning_rate": 3.749065350680425e-05,
"loss": 0.0085,
"step": 83700
},
{
"epoch": 25.06,
"grad_norm": 0.0026446671690791845,
"learning_rate": 3.747569911769104e-05,
"loss": 0.005,
"step": 83800
},
{
"epoch": 25.09,
"grad_norm": 0.1448344588279724,
"learning_rate": 3.746074472857784e-05,
"loss": 0.0064,
"step": 83900
},
{
"epoch": 25.12,
"grad_norm": 0.295718789100647,
"learning_rate": 3.7445790339464634e-05,
"loss": 0.0067,
"step": 84000
},
{
"epoch": 25.12,
"eval_loss": 0.32626327872276306,
"eval_precision": 0.9313109964567663,
"eval_recall": 0.9225653499184088,
"eval_runtime": 304.7239,
"eval_samples_per_second": 43.889,
"eval_steps_per_second": 1.372,
"step": 84000
},
{
"epoch": 25.15,
"grad_norm": 0.028157589957118034,
"learning_rate": 3.743083595035143e-05,
"loss": 0.0094,
"step": 84100
},
{
"epoch": 25.18,
"grad_norm": 0.002226242097094655,
"learning_rate": 3.741588156123823e-05,
"loss": 0.0072,
"step": 84200
},
{
"epoch": 25.21,
"grad_norm": 0.7868858575820923,
"learning_rate": 3.740092717212502e-05,
"loss": 0.0103,
"step": 84300
},
{
"epoch": 25.24,
"grad_norm": 0.031047280877828598,
"learning_rate": 3.738597278301181e-05,
"loss": 0.01,
"step": 84400
},
{
"epoch": 25.27,
"grad_norm": 0.30554434657096863,
"learning_rate": 3.737101839389861e-05,
"loss": 0.0076,
"step": 84500
},
{
"epoch": 25.3,
"grad_norm": 1.2695821523666382,
"learning_rate": 3.7356064004785405e-05,
"loss": 0.0092,
"step": 84600
},
{
"epoch": 25.33,
"grad_norm": 0.039061836898326874,
"learning_rate": 3.7341109615672205e-05,
"loss": 0.0129,
"step": 84700
},
{
"epoch": 25.36,
"grad_norm": 1.0094258785247803,
"learning_rate": 3.7326155226559e-05,
"loss": 0.012,
"step": 84800
},
{
"epoch": 25.39,
"grad_norm": 0.16602523624897003,
"learning_rate": 3.731120083744579e-05,
"loss": 0.0072,
"step": 84900
},
{
"epoch": 25.42,
"grad_norm": 0.6232153177261353,
"learning_rate": 3.729624644833259e-05,
"loss": 0.0094,
"step": 85000
},
{
"epoch": 25.42,
"eval_loss": 0.32043251395225525,
"eval_precision": 0.9310592123725484,
"eval_recall": 0.91936328088919,
"eval_runtime": 304.0822,
"eval_samples_per_second": 43.982,
"eval_steps_per_second": 1.375,
"step": 85000
},
{
"epoch": 25.45,
"grad_norm": 1.6009403467178345,
"learning_rate": 3.728129205921938e-05,
"loss": 0.0103,
"step": 85100
},
{
"epoch": 25.48,
"grad_norm": 0.6107264757156372,
"learning_rate": 3.7266337670106176e-05,
"loss": 0.0079,
"step": 85200
},
{
"epoch": 25.51,
"grad_norm": 0.44173404574394226,
"learning_rate": 3.7251383280992976e-05,
"loss": 0.0065,
"step": 85300
},
{
"epoch": 25.54,
"grad_norm": 0.9073717594146729,
"learning_rate": 3.723642889187977e-05,
"loss": 0.0071,
"step": 85400
},
{
"epoch": 25.57,
"grad_norm": 0.3392820656299591,
"learning_rate": 3.722147450276656e-05,
"loss": 0.0101,
"step": 85500
},
{
"epoch": 25.6,
"grad_norm": 0.07929588109254837,
"learning_rate": 3.720652011365336e-05,
"loss": 0.0083,
"step": 85600
},
{
"epoch": 25.63,
"grad_norm": 0.35071372985839844,
"learning_rate": 3.7191565724540154e-05,
"loss": 0.0121,
"step": 85700
},
{
"epoch": 25.66,
"grad_norm": 0.20559339225292206,
"learning_rate": 3.717661133542695e-05,
"loss": 0.0073,
"step": 85800
},
{
"epoch": 25.69,
"grad_norm": 0.045159224420785904,
"learning_rate": 3.716165694631375e-05,
"loss": 0.0087,
"step": 85900
},
{
"epoch": 25.72,
"grad_norm": 0.10148915648460388,
"learning_rate": 3.714670255720054e-05,
"loss": 0.0119,
"step": 86000
},
{
"epoch": 25.72,
"eval_loss": 0.31306663155555725,
"eval_precision": 0.9333648989898989,
"eval_recall": 0.9104036454324332,
"eval_runtime": 304.164,
"eval_samples_per_second": 43.97,
"eval_steps_per_second": 1.374,
"step": 86000
},
{
"epoch": 25.75,
"grad_norm": 0.18669423460960388,
"learning_rate": 3.713174816808734e-05,
"loss": 0.0063,
"step": 86100
},
{
"epoch": 25.78,
"grad_norm": 0.10197019577026367,
"learning_rate": 3.711679377897413e-05,
"loss": 0.0083,
"step": 86200
},
{
"epoch": 25.81,
"grad_norm": 0.0219405684620142,
"learning_rate": 3.7101839389860925e-05,
"loss": 0.0088,
"step": 86300
},
{
"epoch": 25.84,
"grad_norm": 0.941899836063385,
"learning_rate": 3.7086885000747725e-05,
"loss": 0.006,
"step": 86400
},
{
"epoch": 25.87,
"grad_norm": 0.042357202619314194,
"learning_rate": 3.707193061163452e-05,
"loss": 0.0107,
"step": 86500
},
{
"epoch": 25.9,
"grad_norm": 0.04090040549635887,
"learning_rate": 3.705697622252131e-05,
"loss": 0.0076,
"step": 86600
},
{
"epoch": 25.93,
"grad_norm": 1.0006482601165771,
"learning_rate": 3.704202183340811e-05,
"loss": 0.0081,
"step": 86700
},
{
"epoch": 25.96,
"grad_norm": 0.01344706118106842,
"learning_rate": 3.70270674442949e-05,
"loss": 0.0061,
"step": 86800
},
{
"epoch": 25.99,
"grad_norm": 0.039950937032699585,
"learning_rate": 3.7012113055181696e-05,
"loss": 0.0095,
"step": 86900
},
{
"epoch": 26.02,
"grad_norm": 0.007412883453071117,
"learning_rate": 3.6997158666068496e-05,
"loss": 0.0061,
"step": 87000
},
{
"epoch": 26.02,
"eval_loss": 0.3440411686897278,
"eval_precision": 0.9280669958127618,
"eval_recall": 0.9144370208442378,
"eval_runtime": 304.1449,
"eval_samples_per_second": 43.972,
"eval_steps_per_second": 1.374,
"step": 87000
},
{
"epoch": 26.05,
"grad_norm": 0.045031215995550156,
"learning_rate": 3.698220427695529e-05,
"loss": 0.0083,
"step": 87100
},
{
"epoch": 26.08,
"grad_norm": 0.5366631150245667,
"learning_rate": 3.696724988784208e-05,
"loss": 0.0069,
"step": 87200
},
{
"epoch": 26.11,
"grad_norm": 0.24467185139656067,
"learning_rate": 3.695229549872888e-05,
"loss": 0.0065,
"step": 87300
},
{
"epoch": 26.14,
"grad_norm": 0.7528616786003113,
"learning_rate": 3.6937341109615674e-05,
"loss": 0.0087,
"step": 87400
},
{
"epoch": 26.17,
"grad_norm": 0.15506117045879364,
"learning_rate": 3.692238672050247e-05,
"loss": 0.0072,
"step": 87500
},
{
"epoch": 26.2,
"grad_norm": 0.2464226335287094,
"learning_rate": 3.6907432331389266e-05,
"loss": 0.0053,
"step": 87600
},
{
"epoch": 26.23,
"grad_norm": 0.15138311684131622,
"learning_rate": 3.689247794227606e-05,
"loss": 0.0063,
"step": 87700
},
{
"epoch": 26.26,
"grad_norm": 0.07477385550737381,
"learning_rate": 3.687752355316286e-05,
"loss": 0.0076,
"step": 87800
},
{
"epoch": 26.29,
"grad_norm": 0.661697268486023,
"learning_rate": 3.686256916404965e-05,
"loss": 0.0078,
"step": 87900
},
{
"epoch": 26.32,
"grad_norm": 0.16399236023426056,
"learning_rate": 3.6847614774936445e-05,
"loss": 0.0085,
"step": 88000
},
{
"epoch": 26.32,
"eval_loss": 0.326471209526062,
"eval_precision": 0.9298322483725588,
"eval_recall": 0.9147449120970473,
"eval_runtime": 305.1957,
"eval_samples_per_second": 43.821,
"eval_steps_per_second": 1.37,
"step": 88000
},
{
"epoch": 26.35,
"grad_norm": 0.5788341164588928,
"learning_rate": 3.6832660385823244e-05,
"loss": 0.0097,
"step": 88100
},
{
"epoch": 26.38,
"grad_norm": 0.38478532433509827,
"learning_rate": 3.681770599671003e-05,
"loss": 0.0083,
"step": 88200
},
{
"epoch": 26.41,
"grad_norm": 1.8616811037063599,
"learning_rate": 3.680275160759683e-05,
"loss": 0.0082,
"step": 88300
},
{
"epoch": 26.44,
"grad_norm": 0.005648652091622353,
"learning_rate": 3.678779721848363e-05,
"loss": 0.0074,
"step": 88400
},
{
"epoch": 26.47,
"grad_norm": 0.013662021607160568,
"learning_rate": 3.677284282937042e-05,
"loss": 0.0054,
"step": 88500
},
{
"epoch": 26.5,
"grad_norm": 0.21754692494869232,
"learning_rate": 3.6757888440257216e-05,
"loss": 0.0115,
"step": 88600
},
{
"epoch": 26.53,
"grad_norm": 0.0358903631567955,
"learning_rate": 3.6742934051144015e-05,
"loss": 0.0097,
"step": 88700
},
{
"epoch": 26.56,
"grad_norm": 0.9966431856155396,
"learning_rate": 3.672797966203081e-05,
"loss": 0.0074,
"step": 88800
},
{
"epoch": 26.58,
"grad_norm": 0.7227293848991394,
"learning_rate": 3.67130252729176e-05,
"loss": 0.0088,
"step": 88900
},
{
"epoch": 26.61,
"grad_norm": 1.3261148929595947,
"learning_rate": 3.66980708838044e-05,
"loss": 0.0072,
"step": 89000
},
{
"epoch": 26.61,
"eval_loss": 0.3263101279735565,
"eval_precision": 0.9263782601905357,
"eval_recall": 0.9131438775824379,
"eval_runtime": 306.4472,
"eval_samples_per_second": 43.642,
"eval_steps_per_second": 1.364,
"step": 89000
},
{
"epoch": 26.64,
"grad_norm": 0.11170350760221481,
"learning_rate": 3.6683116494691194e-05,
"loss": 0.0092,
"step": 89100
},
{
"epoch": 26.67,
"grad_norm": 1.529340147972107,
"learning_rate": 3.666816210557799e-05,
"loss": 0.0089,
"step": 89200
},
{
"epoch": 26.7,
"grad_norm": 0.01682981289923191,
"learning_rate": 3.665320771646478e-05,
"loss": 0.0093,
"step": 89300
},
{
"epoch": 26.73,
"grad_norm": 0.3299085199832916,
"learning_rate": 3.663825332735158e-05,
"loss": 0.0063,
"step": 89400
},
{
"epoch": 26.76,
"grad_norm": 1.9823254346847534,
"learning_rate": 3.662329893823838e-05,
"loss": 0.0091,
"step": 89500
},
{
"epoch": 26.79,
"grad_norm": 0.07487453520298004,
"learning_rate": 3.6608344549125165e-05,
"loss": 0.009,
"step": 89600
},
{
"epoch": 26.82,
"grad_norm": 0.015319288708269596,
"learning_rate": 3.6593390160011964e-05,
"loss": 0.0078,
"step": 89700
},
{
"epoch": 26.85,
"grad_norm": 0.004087815526872873,
"learning_rate": 3.6578435770898764e-05,
"loss": 0.0069,
"step": 89800
},
{
"epoch": 26.88,
"grad_norm": 0.00753753213211894,
"learning_rate": 3.656348138178556e-05,
"loss": 0.0057,
"step": 89900
},
{
"epoch": 26.91,
"grad_norm": 0.012257667258381844,
"learning_rate": 3.654852699267235e-05,
"loss": 0.0095,
"step": 90000
},
{
"epoch": 26.91,
"eval_loss": 0.3233014643192291,
"eval_precision": 0.9329517062525696,
"eval_recall": 0.9082484066627667,
"eval_runtime": 304.4964,
"eval_samples_per_second": 43.922,
"eval_steps_per_second": 1.373,
"step": 90000
},
{
"epoch": 26.94,
"grad_norm": 0.030741436406970024,
"learning_rate": 3.653357260355915e-05,
"loss": 0.0067,
"step": 90100
},
{
"epoch": 26.97,
"grad_norm": 0.429049551486969,
"learning_rate": 3.651861821444594e-05,
"loss": 0.012,
"step": 90200
},
{
"epoch": 27.0,
"grad_norm": 0.002479678951203823,
"learning_rate": 3.6503663825332735e-05,
"loss": 0.005,
"step": 90300
},
{
"epoch": 27.03,
"grad_norm": 0.12390375137329102,
"learning_rate": 3.648870943621953e-05,
"loss": 0.0083,
"step": 90400
},
{
"epoch": 27.06,
"grad_norm": 0.044969938695430756,
"learning_rate": 3.647375504710633e-05,
"loss": 0.0073,
"step": 90500
},
{
"epoch": 27.09,
"grad_norm": 0.06378799676895142,
"learning_rate": 3.645880065799313e-05,
"loss": 0.0073,
"step": 90600
},
{
"epoch": 27.12,
"grad_norm": 0.323734849691391,
"learning_rate": 3.6443846268879914e-05,
"loss": 0.0078,
"step": 90700
},
{
"epoch": 27.15,
"grad_norm": 1.6457269191741943,
"learning_rate": 3.642889187976671e-05,
"loss": 0.0055,
"step": 90800
},
{
"epoch": 27.18,
"grad_norm": 0.007004741113632917,
"learning_rate": 3.641393749065351e-05,
"loss": 0.0065,
"step": 90900
},
{
"epoch": 27.21,
"grad_norm": 0.06395163387060165,
"learning_rate": 3.63989831015403e-05,
"loss": 0.0062,
"step": 91000
},
{
"epoch": 27.21,
"eval_loss": 0.32764899730682373,
"eval_precision": 0.9317584480600751,
"eval_recall": 0.916869361741433,
"eval_runtime": 309.1631,
"eval_samples_per_second": 43.259,
"eval_steps_per_second": 1.352,
"step": 91000
},
{
"epoch": 27.24,
"grad_norm": 0.005486265290528536,
"learning_rate": 3.63840287124271e-05,
"loss": 0.0082,
"step": 91100
},
{
"epoch": 27.27,
"grad_norm": 2.3132262229919434,
"learning_rate": 3.63690743233139e-05,
"loss": 0.0067,
"step": 91200
},
{
"epoch": 27.3,
"grad_norm": 0.07687461376190186,
"learning_rate": 3.635411993420069e-05,
"loss": 0.0051,
"step": 91300
},
{
"epoch": 27.33,
"grad_norm": 0.05096305161714554,
"learning_rate": 3.6339165545087484e-05,
"loss": 0.0061,
"step": 91400
},
{
"epoch": 27.36,
"grad_norm": 0.21200311183929443,
"learning_rate": 3.6324211155974284e-05,
"loss": 0.0072,
"step": 91500
},
{
"epoch": 27.39,
"grad_norm": 0.07336900383234024,
"learning_rate": 3.630925676686108e-05,
"loss": 0.008,
"step": 91600
},
{
"epoch": 27.42,
"grad_norm": 0.026788916438817978,
"learning_rate": 3.629430237774787e-05,
"loss": 0.0068,
"step": 91700
},
{
"epoch": 27.45,
"grad_norm": 0.03046250529587269,
"learning_rate": 3.627934798863466e-05,
"loss": 0.0081,
"step": 91800
},
{
"epoch": 27.48,
"grad_norm": 0.32240158319473267,
"learning_rate": 3.626439359952146e-05,
"loss": 0.0091,
"step": 91900
},
{
"epoch": 27.51,
"grad_norm": 0.1428656429052353,
"learning_rate": 3.624943921040826e-05,
"loss": 0.007,
"step": 92000
},
{
"epoch": 27.51,
"eval_loss": 0.3499869704246521,
"eval_precision": 0.9278612426685068,
"eval_recall": 0.9108346931863666,
"eval_runtime": 310.2456,
"eval_samples_per_second": 43.108,
"eval_steps_per_second": 1.347,
"step": 92000
}
],
"logging_steps": 100,
"max_steps": 334400,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 4.8090441780412416e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}