{ "best_metric": null, "best_model_checkpoint": null, "epoch": 27.51196172248804, "eval_steps": 1000, "global_step": 92000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "grad_norm": 3.539609432220459, "learning_rate": 4.99925228054434e-05, "loss": 2.134, "step": 100 }, { "epoch": 0.06, "grad_norm": 3.197829246520996, "learning_rate": 4.997756841633019e-05, "loss": 0.6178, "step": 200 }, { "epoch": 0.09, "grad_norm": 3.3991429805755615, "learning_rate": 4.996261402721699e-05, "loss": 0.5496, "step": 300 }, { "epoch": 0.12, "grad_norm": 3.072633743286133, "learning_rate": 4.9947659638103784e-05, "loss": 0.5228, "step": 400 }, { "epoch": 0.15, "grad_norm": 2.4815468788146973, "learning_rate": 4.993270524899058e-05, "loss": 0.5102, "step": 500 }, { "epoch": 0.18, "grad_norm": 2.794753313064575, "learning_rate": 4.991775085987738e-05, "loss": 0.4746, "step": 600 }, { "epoch": 0.21, "grad_norm": 2.1388251781463623, "learning_rate": 4.9902796470764176e-05, "loss": 0.4769, "step": 700 }, { "epoch": 0.24, "grad_norm": 2.518214225769043, "learning_rate": 4.988784208165096e-05, "loss": 0.4476, "step": 800 }, { "epoch": 0.27, "grad_norm": 4.257823467254639, "learning_rate": 4.987288769253776e-05, "loss": 0.439, "step": 900 }, { "epoch": 0.3, "grad_norm": 2.0235888957977295, "learning_rate": 4.985793330342456e-05, "loss": 0.4465, "step": 1000 }, { "epoch": 0.3, "eval_loss": 0.34466782212257385, "eval_precision": 0.7649398815576958, "eval_recall": 0.7874318790603159, "eval_runtime": 321.2695, "eval_samples_per_second": 41.629, "eval_steps_per_second": 1.301, "step": 1000 }, { "epoch": 0.33, "grad_norm": 2.372622489929199, "learning_rate": 4.984297891431135e-05, "loss": 0.438, "step": 1100 }, { "epoch": 0.36, "grad_norm": 2.184081792831421, "learning_rate": 4.982802452519815e-05, "loss": 0.4319, "step": 1200 }, { "epoch": 0.39, "grad_norm": 1.180004358291626, "learning_rate": 4.981307013608494e-05, "loss": 0.4153, "step": 1300 }, { "epoch": 0.42, "grad_norm": 1.8515098094940186, "learning_rate": 4.979811574697174e-05, "loss": 0.4107, "step": 1400 }, { "epoch": 0.45, "grad_norm": 2.0762712955474854, "learning_rate": 4.978316135785853e-05, "loss": 0.4087, "step": 1500 }, { "epoch": 0.48, "grad_norm": 1.6716846227645874, "learning_rate": 4.9768206968745326e-05, "loss": 0.4082, "step": 1600 }, { "epoch": 0.51, "grad_norm": 2.9515812397003174, "learning_rate": 4.9753252579632126e-05, "loss": 0.398, "step": 1700 }, { "epoch": 0.54, "grad_norm": 1.9658855199813843, "learning_rate": 4.973829819051892e-05, "loss": 0.393, "step": 1800 }, { "epoch": 0.57, "grad_norm": 1.9613778591156006, "learning_rate": 4.972334380140571e-05, "loss": 0.3904, "step": 1900 }, { "epoch": 0.6, "grad_norm": 2.7774882316589355, "learning_rate": 4.970838941229251e-05, "loss": 0.3794, "step": 2000 }, { "epoch": 0.6, "eval_loss": 0.310618132352829, "eval_precision": 0.7516943243620137, "eval_recall": 0.8298285045721852, "eval_runtime": 320.9754, "eval_samples_per_second": 41.667, "eval_steps_per_second": 1.302, "step": 2000 }, { "epoch": 0.63, "grad_norm": 1.4382622241973877, "learning_rate": 4.969343502317931e-05, "loss": 0.369, "step": 2100 }, { "epoch": 0.66, "grad_norm": 1.813565731048584, "learning_rate": 4.96784806340661e-05, "loss": 0.3751, "step": 2200 }, { "epoch": 0.69, "grad_norm": 2.279954195022583, "learning_rate": 4.9663526244952897e-05, "loss": 0.3804, "step": 2300 }, { "epoch": 0.72, "grad_norm": 1.9376351833343506, "learning_rate": 4.9648571855839696e-05, "loss": 0.3611, "step": 2400 }, { "epoch": 0.75, "grad_norm": 2.2867352962493896, "learning_rate": 4.963361746672648e-05, "loss": 0.3739, "step": 2500 }, { "epoch": 0.78, "grad_norm": 2.132394313812256, "learning_rate": 4.961866307761328e-05, "loss": 0.3669, "step": 2600 }, { "epoch": 0.81, "grad_norm": 2.0541863441467285, "learning_rate": 4.9603708688500075e-05, "loss": 0.366, "step": 2700 }, { "epoch": 0.84, "grad_norm": 2.1414847373962402, "learning_rate": 4.9588754299386874e-05, "loss": 0.3535, "step": 2800 }, { "epoch": 0.87, "grad_norm": 1.3949612379074097, "learning_rate": 4.957379991027367e-05, "loss": 0.3684, "step": 2900 }, { "epoch": 0.9, "grad_norm": 1.8921570777893066, "learning_rate": 4.955884552116046e-05, "loss": 0.3556, "step": 3000 }, { "epoch": 0.9, "eval_loss": 0.290554404258728, "eval_precision": 0.79493216033703, "eval_recall": 0.7901105329597586, "eval_runtime": 307.7262, "eval_samples_per_second": 43.461, "eval_steps_per_second": 1.358, "step": 3000 }, { "epoch": 0.93, "grad_norm": 1.6217349767684937, "learning_rate": 4.954389113204726e-05, "loss": 0.3566, "step": 3100 }, { "epoch": 0.96, "grad_norm": 1.524946928024292, "learning_rate": 4.952893674293405e-05, "loss": 0.3477, "step": 3200 }, { "epoch": 0.99, "grad_norm": 1.6807836294174194, "learning_rate": 4.9513982353820846e-05, "loss": 0.3409, "step": 3300 }, { "epoch": 1.02, "grad_norm": 1.5750257968902588, "learning_rate": 4.9499027964707645e-05, "loss": 0.3178, "step": 3400 }, { "epoch": 1.05, "grad_norm": 1.43153715133667, "learning_rate": 4.9484073575594445e-05, "loss": 0.2888, "step": 3500 }, { "epoch": 1.08, "grad_norm": 1.4886215925216675, "learning_rate": 4.946911918648123e-05, "loss": 0.3153, "step": 3600 }, { "epoch": 1.11, "grad_norm": 2.2148983478546143, "learning_rate": 4.945416479736803e-05, "loss": 0.3114, "step": 3700 }, { "epoch": 1.14, "grad_norm": 1.3632937669754028, "learning_rate": 4.9439210408254824e-05, "loss": 0.3031, "step": 3800 }, { "epoch": 1.17, "grad_norm": 1.8350048065185547, "learning_rate": 4.9424256019141617e-05, "loss": 0.292, "step": 3900 }, { "epoch": 1.2, "grad_norm": 1.1402252912521362, "learning_rate": 4.9409301630028416e-05, "loss": 0.2983, "step": 4000 }, { "epoch": 1.2, "eval_loss": 0.2781643867492676, "eval_precision": 0.7788883753177721, "eval_recall": 0.8301363958249947, "eval_runtime": 307.2732, "eval_samples_per_second": 43.525, "eval_steps_per_second": 1.36, "step": 4000 }, { "epoch": 1.23, "grad_norm": 1.2367932796478271, "learning_rate": 4.939434724091521e-05, "loss": 0.2894, "step": 4100 }, { "epoch": 1.26, "grad_norm": 1.4055671691894531, "learning_rate": 4.937939285180201e-05, "loss": 0.2847, "step": 4200 }, { "epoch": 1.29, "grad_norm": 1.910565972328186, "learning_rate": 4.93644384626888e-05, "loss": 0.2917, "step": 4300 }, { "epoch": 1.32, "grad_norm": 1.9085345268249512, "learning_rate": 4.9349484073575595e-05, "loss": 0.2934, "step": 4400 }, { "epoch": 1.35, "grad_norm": 1.5550158023834229, "learning_rate": 4.9334529684462394e-05, "loss": 0.2726, "step": 4500 }, { "epoch": 1.38, "grad_norm": 2.1685421466827393, "learning_rate": 4.931957529534919e-05, "loss": 0.3077, "step": 4600 }, { "epoch": 1.41, "grad_norm": 1.7528005838394165, "learning_rate": 4.930462090623598e-05, "loss": 0.2919, "step": 4700 }, { "epoch": 1.44, "grad_norm": 1.804412841796875, "learning_rate": 4.928966651712278e-05, "loss": 0.278, "step": 4800 }, { "epoch": 1.47, "grad_norm": 2.430739164352417, "learning_rate": 4.927471212800957e-05, "loss": 0.2901, "step": 4900 }, { "epoch": 1.5, "grad_norm": 1.5466407537460327, "learning_rate": 4.9259757738896365e-05, "loss": 0.2886, "step": 5000 }, { "epoch": 1.5, "eval_loss": 0.27095386385917664, "eval_precision": 0.7892478844902066, "eval_recall": 0.8212999168693618, "eval_runtime": 308.5531, "eval_samples_per_second": 43.344, "eval_steps_per_second": 1.355, "step": 5000 }, { "epoch": 1.53, "grad_norm": 1.1303741931915283, "learning_rate": 4.9244803349783165e-05, "loss": 0.291, "step": 5100 }, { "epoch": 1.56, "grad_norm": 1.3640042543411255, "learning_rate": 4.922984896066996e-05, "loss": 0.2897, "step": 5200 }, { "epoch": 1.58, "grad_norm": 1.9915575981140137, "learning_rate": 4.921489457155675e-05, "loss": 0.2798, "step": 5300 }, { "epoch": 1.61, "grad_norm": 1.574576735496521, "learning_rate": 4.919994018244355e-05, "loss": 0.2856, "step": 5400 }, { "epoch": 1.64, "grad_norm": 1.9231148958206177, "learning_rate": 4.918498579333034e-05, "loss": 0.2819, "step": 5500 }, { "epoch": 1.67, "grad_norm": 2.171637773513794, "learning_rate": 4.917003140421714e-05, "loss": 0.2892, "step": 5600 }, { "epoch": 1.7, "grad_norm": 1.7447925806045532, "learning_rate": 4.9155077015103936e-05, "loss": 0.2837, "step": 5700 }, { "epoch": 1.73, "grad_norm": 2.282715320587158, "learning_rate": 4.914012262599073e-05, "loss": 0.2888, "step": 5800 }, { "epoch": 1.76, "grad_norm": 2.041062831878662, "learning_rate": 4.912516823687753e-05, "loss": 0.2733, "step": 5900 }, { "epoch": 1.79, "grad_norm": 1.3900405168533325, "learning_rate": 4.911021384776432e-05, "loss": 0.2982, "step": 6000 }, { "epoch": 1.79, "eval_loss": 0.24861453473567963, "eval_precision": 0.7945360585297875, "eval_recall": 0.8426059915637797, "eval_runtime": 306.7263, "eval_samples_per_second": 43.602, "eval_steps_per_second": 1.363, "step": 6000 }, { "epoch": 1.82, "grad_norm": 2.156783103942871, "learning_rate": 4.9095259458651114e-05, "loss": 0.2883, "step": 6100 }, { "epoch": 1.85, "grad_norm": 1.6421504020690918, "learning_rate": 4.9080305069537914e-05, "loss": 0.2716, "step": 6200 }, { "epoch": 1.88, "grad_norm": 1.6905546188354492, "learning_rate": 4.906535068042471e-05, "loss": 0.2775, "step": 6300 }, { "epoch": 1.91, "grad_norm": 1.1936814785003662, "learning_rate": 4.90503962913115e-05, "loss": 0.2571, "step": 6400 }, { "epoch": 1.94, "grad_norm": 1.7146382331848145, "learning_rate": 4.90354419021983e-05, "loss": 0.2681, "step": 6500 }, { "epoch": 1.97, "grad_norm": 1.5280200242996216, "learning_rate": 4.902048751308509e-05, "loss": 0.2655, "step": 6600 }, { "epoch": 2.0, "grad_norm": 1.4756951332092285, "learning_rate": 4.9005533123971885e-05, "loss": 0.2554, "step": 6700 }, { "epoch": 2.03, "grad_norm": 1.5664458274841309, "learning_rate": 4.8990578734858685e-05, "loss": 0.2125, "step": 6800 }, { "epoch": 2.06, "grad_norm": 1.447304368019104, "learning_rate": 4.897562434574548e-05, "loss": 0.2161, "step": 6900 }, { "epoch": 2.09, "grad_norm": 1.8067011833190918, "learning_rate": 4.896066995663227e-05, "loss": 0.213, "step": 7000 }, { "epoch": 2.09, "eval_loss": 0.24976512789726257, "eval_precision": 0.8138389031705227, "eval_recall": 0.8187752085963238, "eval_runtime": 305.8458, "eval_samples_per_second": 43.728, "eval_steps_per_second": 1.367, "step": 7000 }, { "epoch": 2.12, "grad_norm": 2.7706127166748047, "learning_rate": 4.894571556751907e-05, "loss": 0.2186, "step": 7100 }, { "epoch": 2.15, "grad_norm": 2.394275426864624, "learning_rate": 4.893076117840586e-05, "loss": 0.2094, "step": 7200 }, { "epoch": 2.18, "grad_norm": 1.9464359283447266, "learning_rate": 4.891580678929266e-05, "loss": 0.2278, "step": 7300 }, { "epoch": 2.21, "grad_norm": 2.1283416748046875, "learning_rate": 4.8900852400179456e-05, "loss": 0.2174, "step": 7400 }, { "epoch": 2.24, "grad_norm": 1.7853657007217407, "learning_rate": 4.888589801106625e-05, "loss": 0.2184, "step": 7500 }, { "epoch": 2.27, "grad_norm": 1.1081209182739258, "learning_rate": 4.887094362195305e-05, "loss": 0.2201, "step": 7600 }, { "epoch": 2.3, "grad_norm": 1.3894284963607788, "learning_rate": 4.885598923283984e-05, "loss": 0.2213, "step": 7700 }, { "epoch": 2.33, "grad_norm": 2.0615389347076416, "learning_rate": 4.8841034843726634e-05, "loss": 0.2217, "step": 7800 }, { "epoch": 2.36, "grad_norm": 1.6415098905563354, "learning_rate": 4.8826080454613434e-05, "loss": 0.2266, "step": 7900 }, { "epoch": 2.39, "grad_norm": 3.293736219406128, "learning_rate": 4.8811126065500226e-05, "loss": 0.2117, "step": 8000 }, { "epoch": 2.39, "eval_loss": 0.24216407537460327, "eval_precision": 0.8107814105275881, "eval_recall": 0.826133809538471, "eval_runtime": 307.023, "eval_samples_per_second": 43.56, "eval_steps_per_second": 1.361, "step": 8000 }, { "epoch": 2.42, "grad_norm": 1.1580455303192139, "learning_rate": 4.879617167638702e-05, "loss": 0.2171, "step": 8100 }, { "epoch": 2.45, "grad_norm": 1.0756213665008545, "learning_rate": 4.878121728727382e-05, "loss": 0.2174, "step": 8200 }, { "epoch": 2.48, "grad_norm": 1.871605396270752, "learning_rate": 4.876626289816061e-05, "loss": 0.215, "step": 8300 }, { "epoch": 2.51, "grad_norm": 1.8400825262069702, "learning_rate": 4.8751308509047405e-05, "loss": 0.2215, "step": 8400 }, { "epoch": 2.54, "grad_norm": 2.0464110374450684, "learning_rate": 4.8736354119934204e-05, "loss": 0.2195, "step": 8500 }, { "epoch": 2.57, "grad_norm": 1.2704099416732788, "learning_rate": 4.8721399730821e-05, "loss": 0.2266, "step": 8600 }, { "epoch": 2.6, "grad_norm": 0.9448720216751099, "learning_rate": 4.87064453417078e-05, "loss": 0.2159, "step": 8700 }, { "epoch": 2.63, "grad_norm": 1.2881120443344116, "learning_rate": 4.869149095259459e-05, "loss": 0.2084, "step": 8800 }, { "epoch": 2.66, "grad_norm": 2.0659286975860596, "learning_rate": 4.867653656348138e-05, "loss": 0.2134, "step": 8900 }, { "epoch": 2.69, "grad_norm": 1.109397530555725, "learning_rate": 4.866158217436818e-05, "loss": 0.2129, "step": 9000 }, { "epoch": 2.69, "eval_loss": 0.22735044360160828, "eval_precision": 0.8203027060082556, "eval_recall": 0.8260106530373472, "eval_runtime": 305.794, "eval_samples_per_second": 43.735, "eval_steps_per_second": 1.367, "step": 9000 }, { "epoch": 2.72, "grad_norm": 1.164435625076294, "learning_rate": 4.8646627785254975e-05, "loss": 0.2155, "step": 9100 }, { "epoch": 2.75, "grad_norm": 1.5477757453918457, "learning_rate": 4.863167339614177e-05, "loss": 0.2137, "step": 9200 }, { "epoch": 2.78, "grad_norm": 1.4342052936553955, "learning_rate": 4.861671900702857e-05, "loss": 0.206, "step": 9300 }, { "epoch": 2.81, "grad_norm": 1.3847391605377197, "learning_rate": 4.860176461791536e-05, "loss": 0.2077, "step": 9400 }, { "epoch": 2.84, "grad_norm": 2.9082765579223633, "learning_rate": 4.8586810228802154e-05, "loss": 0.2126, "step": 9500 }, { "epoch": 2.87, "grad_norm": 1.4943510293960571, "learning_rate": 4.857185583968895e-05, "loss": 0.2092, "step": 9600 }, { "epoch": 2.9, "grad_norm": 1.2332855463027954, "learning_rate": 4.8556901450575746e-05, "loss": 0.2222, "step": 9700 }, { "epoch": 2.93, "grad_norm": 2.227031946182251, "learning_rate": 4.854194706146254e-05, "loss": 0.1969, "step": 9800 }, { "epoch": 2.96, "grad_norm": 1.2515846490859985, "learning_rate": 4.852699267234934e-05, "loss": 0.2017, "step": 9900 }, { "epoch": 2.99, "grad_norm": 1.2267186641693115, "learning_rate": 4.851203828323613e-05, "loss": 0.2126, "step": 10000 }, { "epoch": 2.99, "eval_loss": 0.20952437818050385, "eval_precision": 0.8416687769055458, "eval_recall": 0.818682841220481, "eval_runtime": 302.8923, "eval_samples_per_second": 44.154, "eval_steps_per_second": 1.38, "step": 10000 }, { "epoch": 3.02, "grad_norm": 1.151638150215149, "learning_rate": 4.849708389412293e-05, "loss": 0.171, "step": 10100 }, { "epoch": 3.05, "grad_norm": 3.8168528079986572, "learning_rate": 4.8482129505009724e-05, "loss": 0.165, "step": 10200 }, { "epoch": 3.08, "grad_norm": 2.3039355278015137, "learning_rate": 4.846717511589652e-05, "loss": 0.1675, "step": 10300 }, { "epoch": 3.11, "grad_norm": 1.252301812171936, "learning_rate": 4.845222072678332e-05, "loss": 0.1554, "step": 10400 }, { "epoch": 3.14, "grad_norm": 1.2682992219924927, "learning_rate": 4.843726633767011e-05, "loss": 0.1756, "step": 10500 }, { "epoch": 3.17, "grad_norm": 1.3934777975082397, "learning_rate": 4.84223119485569e-05, "loss": 0.1576, "step": 10600 }, { "epoch": 3.2, "grad_norm": 1.3386119604110718, "learning_rate": 4.84073575594437e-05, "loss": 0.1602, "step": 10700 }, { "epoch": 3.23, "grad_norm": 1.6670503616333008, "learning_rate": 4.8392403170330495e-05, "loss": 0.1638, "step": 10800 }, { "epoch": 3.26, "grad_norm": 2.5150694847106934, "learning_rate": 4.837744878121729e-05, "loss": 0.1653, "step": 10900 }, { "epoch": 3.29, "grad_norm": 2.840406656265259, "learning_rate": 4.836249439210409e-05, "loss": 0.1607, "step": 11000 }, { "epoch": 3.29, "eval_loss": 0.22238589823246002, "eval_precision": 0.8404415146405029, "eval_recall": 0.8439607130761415, "eval_runtime": 304.8188, "eval_samples_per_second": 43.875, "eval_steps_per_second": 1.371, "step": 11000 }, { "epoch": 3.32, "grad_norm": 1.5171958208084106, "learning_rate": 4.834754000299088e-05, "loss": 0.1606, "step": 11100 }, { "epoch": 3.35, "grad_norm": 1.6955703496932983, "learning_rate": 4.833258561387767e-05, "loss": 0.1554, "step": 11200 }, { "epoch": 3.38, "grad_norm": 1.893128514289856, "learning_rate": 4.831763122476447e-05, "loss": 0.1488, "step": 11300 }, { "epoch": 3.41, "grad_norm": 1.7299461364746094, "learning_rate": 4.8302676835651266e-05, "loss": 0.1596, "step": 11400 }, { "epoch": 3.44, "grad_norm": 2.150355339050293, "learning_rate": 4.8287722446538065e-05, "loss": 0.1623, "step": 11500 }, { "epoch": 3.47, "grad_norm": 3.2869186401367188, "learning_rate": 4.827276805742486e-05, "loss": 0.1622, "step": 11600 }, { "epoch": 3.5, "grad_norm": 1.7936344146728516, "learning_rate": 4.825781366831165e-05, "loss": 0.1651, "step": 11700 }, { "epoch": 3.53, "grad_norm": 1.579736590385437, "learning_rate": 4.824285927919845e-05, "loss": 0.169, "step": 11800 }, { "epoch": 3.56, "grad_norm": 2.1929283142089844, "learning_rate": 4.822790489008524e-05, "loss": 0.1629, "step": 11900 }, { "epoch": 3.59, "grad_norm": 1.7842892408370972, "learning_rate": 4.821295050097204e-05, "loss": 0.1621, "step": 12000 }, { "epoch": 3.59, "eval_loss": 0.21504360437393188, "eval_precision": 0.8350246187102197, "eval_recall": 0.8563379414390837, "eval_runtime": 306.2124, "eval_samples_per_second": 43.676, "eval_steps_per_second": 1.365, "step": 12000 }, { "epoch": 3.62, "grad_norm": 2.2203197479248047, "learning_rate": 4.8197996111858836e-05, "loss": 0.1595, "step": 12100 }, { "epoch": 3.65, "grad_norm": 1.8541319370269775, "learning_rate": 4.818304172274562e-05, "loss": 0.1702, "step": 12200 }, { "epoch": 3.68, "grad_norm": 1.3299143314361572, "learning_rate": 4.816808733363242e-05, "loss": 0.1651, "step": 12300 }, { "epoch": 3.71, "grad_norm": 1.7831319570541382, "learning_rate": 4.815313294451922e-05, "loss": 0.1601, "step": 12400 }, { "epoch": 3.74, "grad_norm": 1.0528268814086914, "learning_rate": 4.8138178555406015e-05, "loss": 0.1644, "step": 12500 }, { "epoch": 3.77, "grad_norm": 1.306907057762146, "learning_rate": 4.812322416629281e-05, "loss": 0.1556, "step": 12600 }, { "epoch": 3.8, "grad_norm": 1.8565049171447754, "learning_rate": 4.810826977717961e-05, "loss": 0.1654, "step": 12700 }, { "epoch": 3.83, "grad_norm": 1.4770090579986572, "learning_rate": 4.80933153880664e-05, "loss": 0.1628, "step": 12800 }, { "epoch": 3.86, "grad_norm": 1.9089502096176147, "learning_rate": 4.807836099895319e-05, "loss": 0.1632, "step": 12900 }, { "epoch": 3.89, "grad_norm": 1.3788821697235107, "learning_rate": 4.806340660983999e-05, "loss": 0.1597, "step": 13000 }, { "epoch": 3.89, "eval_loss": 0.2062728852033615, "eval_precision": 0.8378547953391097, "eval_recall": 0.8634194402537024, "eval_runtime": 304.7295, "eval_samples_per_second": 43.888, "eval_steps_per_second": 1.372, "step": 13000 }, { "epoch": 3.92, "grad_norm": 15.79686164855957, "learning_rate": 4.8048452220726785e-05, "loss": 0.1637, "step": 13100 }, { "epoch": 3.95, "grad_norm": 1.9472129344940186, "learning_rate": 4.8033497831613585e-05, "loss": 0.1666, "step": 13200 }, { "epoch": 3.98, "grad_norm": 2.1338746547698975, "learning_rate": 4.801854344250037e-05, "loss": 0.1614, "step": 13300 }, { "epoch": 4.01, "grad_norm": 1.1886940002441406, "learning_rate": 4.800358905338717e-05, "loss": 0.1474, "step": 13400 }, { "epoch": 4.04, "grad_norm": 2.4190924167633057, "learning_rate": 4.798863466427397e-05, "loss": 0.121, "step": 13500 }, { "epoch": 4.07, "grad_norm": 0.902584433555603, "learning_rate": 4.797368027516076e-05, "loss": 0.1192, "step": 13600 }, { "epoch": 4.1, "grad_norm": 2.3466804027557373, "learning_rate": 4.7958725886047556e-05, "loss": 0.129, "step": 13700 }, { "epoch": 4.13, "grad_norm": 4.135778427124023, "learning_rate": 4.7943771496934356e-05, "loss": 0.1206, "step": 13800 }, { "epoch": 4.16, "grad_norm": 1.6940075159072876, "learning_rate": 4.792881710782115e-05, "loss": 0.1313, "step": 13900 }, { "epoch": 4.19, "grad_norm": 1.7989047765731812, "learning_rate": 4.791386271870794e-05, "loss": 0.1139, "step": 14000 }, { "epoch": 4.19, "eval_loss": 0.20718763768672943, "eval_precision": 0.8631126181281592, "eval_recall": 0.8464238430986176, "eval_runtime": 304.0256, "eval_samples_per_second": 43.99, "eval_steps_per_second": 1.375, "step": 14000 }, { "epoch": 4.22, "grad_norm": 1.9864155054092407, "learning_rate": 4.789890832959474e-05, "loss": 0.1222, "step": 14100 }, { "epoch": 4.25, "grad_norm": 2.944260835647583, "learning_rate": 4.7883953940481534e-05, "loss": 0.1238, "step": 14200 }, { "epoch": 4.28, "grad_norm": 0.5448206663131714, "learning_rate": 4.786899955136833e-05, "loss": 0.1191, "step": 14300 }, { "epoch": 4.31, "grad_norm": 1.2996718883514404, "learning_rate": 4.785404516225512e-05, "loss": 0.1208, "step": 14400 }, { "epoch": 4.34, "grad_norm": 2.5177977085113525, "learning_rate": 4.783909077314192e-05, "loss": 0.1258, "step": 14500 }, { "epoch": 4.37, "grad_norm": 1.1356126070022583, "learning_rate": 4.782413638402872e-05, "loss": 0.1223, "step": 14600 }, { "epoch": 4.4, "grad_norm": 1.2576464414596558, "learning_rate": 4.7809181994915506e-05, "loss": 0.124, "step": 14700 }, { "epoch": 4.43, "grad_norm": 0.8868162631988525, "learning_rate": 4.7794227605802305e-05, "loss": 0.1246, "step": 14800 }, { "epoch": 4.46, "grad_norm": 2.3075501918792725, "learning_rate": 4.7779273216689105e-05, "loss": 0.1216, "step": 14900 }, { "epoch": 4.49, "grad_norm": 1.5548241138458252, "learning_rate": 4.776431882757589e-05, "loss": 0.1221, "step": 15000 }, { "epoch": 4.49, "eval_loss": 0.19333235919475555, "eval_precision": 0.8727586319112239, "eval_recall": 0.8257335509098187, "eval_runtime": 301.0242, "eval_samples_per_second": 44.428, "eval_steps_per_second": 1.389, "step": 15000 }, { "epoch": 4.52, "grad_norm": 1.0018868446350098, "learning_rate": 4.774936443846269e-05, "loss": 0.1237, "step": 15100 }, { "epoch": 4.55, "grad_norm": 1.264910101890564, "learning_rate": 4.773441004934949e-05, "loss": 0.1156, "step": 15200 }, { "epoch": 4.58, "grad_norm": 5.281520366668701, "learning_rate": 4.771945566023628e-05, "loss": 0.1286, "step": 15300 }, { "epoch": 4.61, "grad_norm": 1.9591494798660278, "learning_rate": 4.7704501271123076e-05, "loss": 0.1249, "step": 15400 }, { "epoch": 4.64, "grad_norm": 2.021794080734253, "learning_rate": 4.768954688200987e-05, "loss": 0.1233, "step": 15500 }, { "epoch": 4.67, "grad_norm": 2.007873773574829, "learning_rate": 4.767459249289667e-05, "loss": 0.1281, "step": 15600 }, { "epoch": 4.69, "grad_norm": 2.0108394622802734, "learning_rate": 4.765963810378346e-05, "loss": 0.1302, "step": 15700 }, { "epoch": 4.72, "grad_norm": 1.7474627494812012, "learning_rate": 4.7644683714670254e-05, "loss": 0.1164, "step": 15800 }, { "epoch": 4.75, "grad_norm": 0.758482813835144, "learning_rate": 4.7629729325557054e-05, "loss": 0.1211, "step": 15900 }, { "epoch": 4.78, "grad_norm": 0.9910192489624023, "learning_rate": 4.7614774936443854e-05, "loss": 0.1222, "step": 16000 }, { "epoch": 4.78, "eval_loss": 0.1955721527338028, "eval_precision": 0.8685029567382508, "eval_recall": 0.8591705409649312, "eval_runtime": 303.5505, "eval_samples_per_second": 44.059, "eval_steps_per_second": 1.377, "step": 16000 }, { "epoch": 4.81, "grad_norm": 2.4667110443115234, "learning_rate": 4.759982054733064e-05, "loss": 0.1214, "step": 16100 }, { "epoch": 4.84, "grad_norm": 2.103156566619873, "learning_rate": 4.758486615821744e-05, "loss": 0.1211, "step": 16200 }, { "epoch": 4.87, "grad_norm": 1.3806654214859009, "learning_rate": 4.756991176910424e-05, "loss": 0.1152, "step": 16300 }, { "epoch": 4.9, "grad_norm": 2.1174566745758057, "learning_rate": 4.7554957379991025e-05, "loss": 0.1246, "step": 16400 }, { "epoch": 4.93, "grad_norm": 2.0334010124206543, "learning_rate": 4.7540002990877825e-05, "loss": 0.1189, "step": 16500 }, { "epoch": 4.96, "grad_norm": 2.668717861175537, "learning_rate": 4.7525048601764625e-05, "loss": 0.1237, "step": 16600 }, { "epoch": 4.99, "grad_norm": 2.0749363899230957, "learning_rate": 4.751009421265142e-05, "loss": 0.1141, "step": 16700 }, { "epoch": 5.02, "grad_norm": 1.893052577972412, "learning_rate": 4.749513982353821e-05, "loss": 0.095, "step": 16800 }, { "epoch": 5.05, "grad_norm": 0.6495729684829712, "learning_rate": 4.7480185434425e-05, "loss": 0.085, "step": 16900 }, { "epoch": 5.08, "grad_norm": 1.8883150815963745, "learning_rate": 4.74652310453118e-05, "loss": 0.0886, "step": 17000 }, { "epoch": 5.08, "eval_loss": 0.2067934274673462, "eval_precision": 0.880300808187974, "eval_recall": 0.8685920133009021, "eval_runtime": 303.377, "eval_samples_per_second": 44.084, "eval_steps_per_second": 1.378, "step": 17000 }, { "epoch": 5.11, "grad_norm": 1.110809326171875, "learning_rate": 4.7450276656198596e-05, "loss": 0.0895, "step": 17100 }, { "epoch": 5.14, "grad_norm": 1.9441896677017212, "learning_rate": 4.743532226708539e-05, "loss": 0.0935, "step": 17200 }, { "epoch": 5.17, "grad_norm": 1.9851264953613281, "learning_rate": 4.742036787797219e-05, "loss": 0.0927, "step": 17300 }, { "epoch": 5.2, "grad_norm": 1.2447096109390259, "learning_rate": 4.740541348885899e-05, "loss": 0.0911, "step": 17400 }, { "epoch": 5.23, "grad_norm": 1.0151656866073608, "learning_rate": 4.7390459099745774e-05, "loss": 0.0932, "step": 17500 }, { "epoch": 5.26, "grad_norm": 0.8265299201011658, "learning_rate": 4.7375504710632574e-05, "loss": 0.1006, "step": 17600 }, { "epoch": 5.29, "grad_norm": 2.7819435596466064, "learning_rate": 4.736055032151937e-05, "loss": 0.0892, "step": 17700 }, { "epoch": 5.32, "grad_norm": 1.3706836700439453, "learning_rate": 4.734559593240616e-05, "loss": 0.0976, "step": 17800 }, { "epoch": 5.35, "grad_norm": 3.606653928756714, "learning_rate": 4.733064154329296e-05, "loss": 0.0932, "step": 17900 }, { "epoch": 5.38, "grad_norm": 1.3535112142562866, "learning_rate": 4.731568715417975e-05, "loss": 0.0917, "step": 18000 }, { "epoch": 5.38, "eval_loss": 0.1965586394071579, "eval_precision": 0.8806825297432687, "eval_recall": 0.8660673050278641, "eval_runtime": 303.4486, "eval_samples_per_second": 44.073, "eval_steps_per_second": 1.377, "step": 18000 }, { "epoch": 5.41, "grad_norm": 1.7558257579803467, "learning_rate": 4.7300732765066545e-05, "loss": 0.088, "step": 18100 }, { "epoch": 5.44, "grad_norm": 2.291628837585449, "learning_rate": 4.7285778375953345e-05, "loss": 0.0963, "step": 18200 }, { "epoch": 5.47, "grad_norm": 1.4217274188995361, "learning_rate": 4.727082398684014e-05, "loss": 0.0969, "step": 18300 }, { "epoch": 5.5, "grad_norm": 1.8852524757385254, "learning_rate": 4.725586959772694e-05, "loss": 0.0952, "step": 18400 }, { "epoch": 5.53, "grad_norm": 2.106452465057373, "learning_rate": 4.724091520861373e-05, "loss": 0.0966, "step": 18500 }, { "epoch": 5.56, "grad_norm": 1.9277011156082153, "learning_rate": 4.722596081950052e-05, "loss": 0.089, "step": 18600 }, { "epoch": 5.59, "grad_norm": 1.2175403833389282, "learning_rate": 4.721100643038732e-05, "loss": 0.0931, "step": 18700 }, { "epoch": 5.62, "grad_norm": 2.060368299484253, "learning_rate": 4.7196052041274115e-05, "loss": 0.0968, "step": 18800 }, { "epoch": 5.65, "grad_norm": 1.4981082677841187, "learning_rate": 4.718109765216091e-05, "loss": 0.0929, "step": 18900 }, { "epoch": 5.68, "grad_norm": 1.6335569620132446, "learning_rate": 4.716614326304771e-05, "loss": 0.0938, "step": 19000 }, { "epoch": 5.68, "eval_loss": 0.19031907618045807, "eval_precision": 0.8913960623881361, "eval_recall": 0.858708704085717, "eval_runtime": 301.9634, "eval_samples_per_second": 44.29, "eval_steps_per_second": 1.384, "step": 19000 }, { "epoch": 5.71, "grad_norm": 0.46949952840805054, "learning_rate": 4.71511888739345e-05, "loss": 0.09, "step": 19100 }, { "epoch": 5.74, "grad_norm": 2.6525633335113525, "learning_rate": 4.7136234484821294e-05, "loss": 0.0954, "step": 19200 }, { "epoch": 5.77, "grad_norm": 1.2892892360687256, "learning_rate": 4.7121280095708093e-05, "loss": 0.0949, "step": 19300 }, { "epoch": 5.8, "grad_norm": 1.5637331008911133, "learning_rate": 4.7106325706594886e-05, "loss": 0.0962, "step": 19400 }, { "epoch": 5.83, "grad_norm": 2.5609443187713623, "learning_rate": 4.709137131748168e-05, "loss": 0.0921, "step": 19500 }, { "epoch": 5.86, "grad_norm": 1.4690775871276855, "learning_rate": 4.707641692836848e-05, "loss": 0.0955, "step": 19600 }, { "epoch": 5.89, "grad_norm": 1.081965684890747, "learning_rate": 4.706146253925527e-05, "loss": 0.0928, "step": 19700 }, { "epoch": 5.92, "grad_norm": 1.6817141771316528, "learning_rate": 4.704650815014207e-05, "loss": 0.0963, "step": 19800 }, { "epoch": 5.95, "grad_norm": 2.984762191772461, "learning_rate": 4.7031553761028864e-05, "loss": 0.095, "step": 19900 }, { "epoch": 5.98, "grad_norm": 2.1594882011413574, "learning_rate": 4.701659937191566e-05, "loss": 0.0985, "step": 20000 }, { "epoch": 5.98, "eval_loss": 0.18151727318763733, "eval_precision": 0.9042639298086573, "eval_recall": 0.859940269096955, "eval_runtime": 302.8985, "eval_samples_per_second": 44.153, "eval_steps_per_second": 1.38, "step": 20000 }, { "epoch": 6.01, "grad_norm": 2.0218722820281982, "learning_rate": 4.700164498280246e-05, "loss": 0.0886, "step": 20100 }, { "epoch": 6.04, "grad_norm": 1.3569700717926025, "learning_rate": 4.698669059368925e-05, "loss": 0.0711, "step": 20200 }, { "epoch": 6.07, "grad_norm": 1.5697298049926758, "learning_rate": 4.697173620457604e-05, "loss": 0.0724, "step": 20300 }, { "epoch": 6.1, "grad_norm": 1.7853014469146729, "learning_rate": 4.695678181546284e-05, "loss": 0.0747, "step": 20400 }, { "epoch": 6.13, "grad_norm": 0.7531015872955322, "learning_rate": 4.6941827426349635e-05, "loss": 0.074, "step": 20500 }, { "epoch": 6.16, "grad_norm": 1.3895870447158813, "learning_rate": 4.692687303723643e-05, "loss": 0.0683, "step": 20600 }, { "epoch": 6.19, "grad_norm": 2.084857225418091, "learning_rate": 4.691191864812323e-05, "loss": 0.0741, "step": 20700 }, { "epoch": 6.22, "grad_norm": 0.9525838494300842, "learning_rate": 4.689696425901002e-05, "loss": 0.0647, "step": 20800 }, { "epoch": 6.25, "grad_norm": 2.0475118160247803, "learning_rate": 4.6882009869896813e-05, "loss": 0.0746, "step": 20900 }, { "epoch": 6.28, "grad_norm": 1.0650370121002197, "learning_rate": 4.686705548078361e-05, "loss": 0.0696, "step": 21000 }, { "epoch": 6.28, "eval_loss": 0.19116894900798798, "eval_precision": 0.9016753284483037, "eval_recall": 0.8600326364727978, "eval_runtime": 303.289, "eval_samples_per_second": 44.097, "eval_steps_per_second": 1.378, "step": 21000 }, { "epoch": 6.31, "grad_norm": 1.5736846923828125, "learning_rate": 4.6852101091670406e-05, "loss": 0.0685, "step": 21100 }, { "epoch": 6.34, "grad_norm": 0.7526031136512756, "learning_rate": 4.6837146702557206e-05, "loss": 0.0816, "step": 21200 }, { "epoch": 6.37, "grad_norm": 1.284680724143982, "learning_rate": 4.6822192313444e-05, "loss": 0.0676, "step": 21300 }, { "epoch": 6.4, "grad_norm": 4.207923889160156, "learning_rate": 4.680723792433079e-05, "loss": 0.0679, "step": 21400 }, { "epoch": 6.43, "grad_norm": 1.3670810461044312, "learning_rate": 4.679228353521759e-05, "loss": 0.0721, "step": 21500 }, { "epoch": 6.46, "grad_norm": 1.8094091415405273, "learning_rate": 4.6777329146104384e-05, "loss": 0.0673, "step": 21600 }, { "epoch": 6.49, "grad_norm": 2.057133436203003, "learning_rate": 4.676237475699118e-05, "loss": 0.0711, "step": 21700 }, { "epoch": 6.52, "grad_norm": 1.9356772899627686, "learning_rate": 4.6747420367877976e-05, "loss": 0.0713, "step": 21800 }, { "epoch": 6.55, "grad_norm": 0.4188990592956543, "learning_rate": 4.673246597876477e-05, "loss": 0.0772, "step": 21900 }, { "epoch": 6.58, "grad_norm": 0.9256879091262817, "learning_rate": 4.671751158965156e-05, "loss": 0.0715, "step": 22000 }, { "epoch": 6.58, "eval_loss": 0.19474047422409058, "eval_precision": 0.9012208304190246, "eval_recall": 0.8727793343391115, "eval_runtime": 305.0313, "eval_samples_per_second": 43.845, "eval_steps_per_second": 1.37, "step": 22000 }, { "epoch": 6.61, "grad_norm": 0.890701949596405, "learning_rate": 4.670255720053836e-05, "loss": 0.0712, "step": 22100 }, { "epoch": 6.64, "grad_norm": 1.6164826154708862, "learning_rate": 4.6687602811425155e-05, "loss": 0.0772, "step": 22200 }, { "epoch": 6.67, "grad_norm": 1.2075903415679932, "learning_rate": 4.667264842231195e-05, "loss": 0.0734, "step": 22300 }, { "epoch": 6.7, "grad_norm": 0.9141576886177063, "learning_rate": 4.665769403319875e-05, "loss": 0.0803, "step": 22400 }, { "epoch": 6.73, "grad_norm": 3.0547311305999756, "learning_rate": 4.664273964408554e-05, "loss": 0.0688, "step": 22500 }, { "epoch": 6.76, "grad_norm": 1.1152849197387695, "learning_rate": 4.662778525497234e-05, "loss": 0.0703, "step": 22600 }, { "epoch": 6.79, "grad_norm": 2.150590181350708, "learning_rate": 4.661283086585913e-05, "loss": 0.0745, "step": 22700 }, { "epoch": 6.82, "grad_norm": 1.4829721450805664, "learning_rate": 4.6597876476745926e-05, "loss": 0.0738, "step": 22800 }, { "epoch": 6.85, "grad_norm": 0.6545503735542297, "learning_rate": 4.6582922087632725e-05, "loss": 0.0764, "step": 22900 }, { "epoch": 6.88, "grad_norm": 1.2322636842727661, "learning_rate": 4.656796769851952e-05, "loss": 0.0765, "step": 23000 }, { "epoch": 6.88, "eval_loss": 0.18639414012432098, "eval_precision": 0.9072111489223789, "eval_recall": 0.861849194864374, "eval_runtime": 301.5834, "eval_samples_per_second": 44.346, "eval_steps_per_second": 1.386, "step": 23000 }, { "epoch": 6.91, "grad_norm": 1.8931362628936768, "learning_rate": 4.655301330940631e-05, "loss": 0.0783, "step": 23100 }, { "epoch": 6.94, "grad_norm": 0.7884649038314819, "learning_rate": 4.653805892029311e-05, "loss": 0.0718, "step": 23200 }, { "epoch": 6.97, "grad_norm": 0.6341440081596375, "learning_rate": 4.6523104531179904e-05, "loss": 0.0698, "step": 23300 }, { "epoch": 7.0, "grad_norm": 0.9098210334777832, "learning_rate": 4.6508150142066697e-05, "loss": 0.071, "step": 23400 }, { "epoch": 7.03, "grad_norm": 3.0700671672821045, "learning_rate": 4.6493195752953496e-05, "loss": 0.0552, "step": 23500 }, { "epoch": 7.06, "grad_norm": 1.5736912488937378, "learning_rate": 4.647824136384029e-05, "loss": 0.055, "step": 23600 }, { "epoch": 7.09, "grad_norm": 0.9347396492958069, "learning_rate": 4.646328697472708e-05, "loss": 0.0592, "step": 23700 }, { "epoch": 7.12, "grad_norm": 1.7453091144561768, "learning_rate": 4.644833258561388e-05, "loss": 0.0623, "step": 23800 }, { "epoch": 7.15, "grad_norm": 1.1539710760116577, "learning_rate": 4.6433378196500674e-05, "loss": 0.0558, "step": 23900 }, { "epoch": 7.18, "grad_norm": 0.7530619502067566, "learning_rate": 4.641842380738747e-05, "loss": 0.0546, "step": 24000 }, { "epoch": 7.18, "eval_loss": 0.2078467607498169, "eval_precision": 0.908101688386724, "eval_recall": 0.8710551433233782, "eval_runtime": 302.902, "eval_samples_per_second": 44.153, "eval_steps_per_second": 1.38, "step": 24000 }, { "epoch": 7.21, "grad_norm": 1.6339865922927856, "learning_rate": 4.640346941827427e-05, "loss": 0.0579, "step": 24100 }, { "epoch": 7.24, "grad_norm": 2.397862434387207, "learning_rate": 4.638851502916106e-05, "loss": 0.054, "step": 24200 }, { "epoch": 7.27, "grad_norm": 2.5979652404785156, "learning_rate": 4.637356064004786e-05, "loss": 0.0582, "step": 24300 }, { "epoch": 7.3, "grad_norm": 1.4249415397644043, "learning_rate": 4.635860625093465e-05, "loss": 0.0611, "step": 24400 }, { "epoch": 7.33, "grad_norm": 1.1104274988174438, "learning_rate": 4.6343651861821445e-05, "loss": 0.0603, "step": 24500 }, { "epoch": 7.36, "grad_norm": 1.039832353591919, "learning_rate": 4.6328697472708245e-05, "loss": 0.06, "step": 24600 }, { "epoch": 7.39, "grad_norm": 1.1284308433532715, "learning_rate": 4.631374308359504e-05, "loss": 0.0528, "step": 24700 }, { "epoch": 7.42, "grad_norm": 3.3189823627471924, "learning_rate": 4.629878869448183e-05, "loss": 0.0634, "step": 24800 }, { "epoch": 7.45, "grad_norm": 2.0465550422668457, "learning_rate": 4.628383430536863e-05, "loss": 0.0599, "step": 24900 }, { "epoch": 7.48, "grad_norm": 1.93597412109375, "learning_rate": 4.626887991625542e-05, "loss": 0.0588, "step": 25000 }, { "epoch": 7.48, "eval_loss": 0.20041726529598236, "eval_precision": 0.9101642057026477, "eval_recall": 0.8805997721604729, "eval_runtime": 302.521, "eval_samples_per_second": 44.209, "eval_steps_per_second": 1.382, "step": 25000 }, { "epoch": 7.51, "grad_norm": 2.2025020122528076, "learning_rate": 4.6253925527142216e-05, "loss": 0.0557, "step": 25100 }, { "epoch": 7.54, "grad_norm": 2.4900927543640137, "learning_rate": 4.6238971138029016e-05, "loss": 0.0613, "step": 25200 }, { "epoch": 7.57, "grad_norm": 1.2546288967132568, "learning_rate": 4.622401674891581e-05, "loss": 0.0609, "step": 25300 }, { "epoch": 7.6, "grad_norm": 1.3969674110412598, "learning_rate": 4.62090623598026e-05, "loss": 0.0617, "step": 25400 }, { "epoch": 7.63, "grad_norm": 0.2969658374786377, "learning_rate": 4.61941079706894e-05, "loss": 0.0602, "step": 25500 }, { "epoch": 7.66, "grad_norm": 0.7388882040977478, "learning_rate": 4.6179153581576194e-05, "loss": 0.0593, "step": 25600 }, { "epoch": 7.69, "grad_norm": 0.609923779964447, "learning_rate": 4.6164199192462994e-05, "loss": 0.0596, "step": 25700 }, { "epoch": 7.72, "grad_norm": 2.3986215591430664, "learning_rate": 4.614924480334979e-05, "loss": 0.0651, "step": 25800 }, { "epoch": 7.75, "grad_norm": 1.1203041076660156, "learning_rate": 4.613429041423658e-05, "loss": 0.0649, "step": 25900 }, { "epoch": 7.78, "grad_norm": 0.7929214835166931, "learning_rate": 4.611933602512338e-05, "loss": 0.0648, "step": 26000 }, { "epoch": 7.78, "eval_loss": 0.19321496784687042, "eval_precision": 0.9163062916598927, "eval_recall": 0.8676683395424736, "eval_runtime": 301.2643, "eval_samples_per_second": 44.393, "eval_steps_per_second": 1.387, "step": 26000 }, { "epoch": 7.81, "grad_norm": 0.5828276872634888, "learning_rate": 4.610438163601017e-05, "loss": 0.058, "step": 26100 }, { "epoch": 7.83, "grad_norm": 0.44025149941444397, "learning_rate": 4.6089427246896965e-05, "loss": 0.0598, "step": 26200 }, { "epoch": 7.86, "grad_norm": 0.7976229786872864, "learning_rate": 4.6074472857783765e-05, "loss": 0.0655, "step": 26300 }, { "epoch": 7.89, "grad_norm": 2.6843769550323486, "learning_rate": 4.605951846867056e-05, "loss": 0.0588, "step": 26400 }, { "epoch": 7.92, "grad_norm": 1.1365008354187012, "learning_rate": 4.604456407955735e-05, "loss": 0.0563, "step": 26500 }, { "epoch": 7.95, "grad_norm": 2.463488817214966, "learning_rate": 4.602960969044415e-05, "loss": 0.0581, "step": 26600 }, { "epoch": 7.98, "grad_norm": 0.47716620564460754, "learning_rate": 4.601465530133094e-05, "loss": 0.0595, "step": 26700 }, { "epoch": 8.01, "grad_norm": 1.3218754529953003, "learning_rate": 4.5999700912217736e-05, "loss": 0.0554, "step": 26800 }, { "epoch": 8.04, "grad_norm": 1.0640392303466797, "learning_rate": 4.5984746523104536e-05, "loss": 0.0409, "step": 26900 }, { "epoch": 8.07, "grad_norm": 0.7323993444442749, "learning_rate": 4.596979213399133e-05, "loss": 0.0463, "step": 27000 }, { "epoch": 8.07, "eval_loss": 0.21357020735740662, "eval_precision": 0.9223724947042529, "eval_recall": 0.8714246128267495, "eval_runtime": 301.9271, "eval_samples_per_second": 44.295, "eval_steps_per_second": 1.384, "step": 27000 }, { "epoch": 8.1, "grad_norm": 2.1960983276367188, "learning_rate": 4.595483774487813e-05, "loss": 0.0424, "step": 27100 }, { "epoch": 8.13, "grad_norm": 2.5061357021331787, "learning_rate": 4.593988335576492e-05, "loss": 0.0436, "step": 27200 }, { "epoch": 8.16, "grad_norm": 0.5249370336532593, "learning_rate": 4.5924928966651714e-05, "loss": 0.0537, "step": 27300 }, { "epoch": 8.19, "grad_norm": 1.0211517810821533, "learning_rate": 4.5909974577538514e-05, "loss": 0.0448, "step": 27400 }, { "epoch": 8.22, "grad_norm": 2.860835552215576, "learning_rate": 4.58950201884253e-05, "loss": 0.0474, "step": 27500 }, { "epoch": 8.25, "grad_norm": 2.019699811935425, "learning_rate": 4.58800657993121e-05, "loss": 0.0482, "step": 27600 }, { "epoch": 8.28, "grad_norm": 0.9144898653030396, "learning_rate": 4.58651114101989e-05, "loss": 0.045, "step": 27700 }, { "epoch": 8.31, "grad_norm": 1.656792402267456, "learning_rate": 4.585015702108569e-05, "loss": 0.0475, "step": 27800 }, { "epoch": 8.34, "grad_norm": 1.1702663898468018, "learning_rate": 4.5835202631972485e-05, "loss": 0.0445, "step": 27900 }, { "epoch": 8.37, "grad_norm": 2.0331854820251465, "learning_rate": 4.5820248242859284e-05, "loss": 0.0429, "step": 28000 }, { "epoch": 8.37, "eval_loss": 0.22609786689281464, "eval_precision": 0.9198246970868781, "eval_recall": 0.8788447920194588, "eval_runtime": 302.1631, "eval_samples_per_second": 44.261, "eval_steps_per_second": 1.383, "step": 28000 }, { "epoch": 8.4, "grad_norm": 5.98319673538208, "learning_rate": 4.580529385374608e-05, "loss": 0.0429, "step": 28100 }, { "epoch": 8.43, "grad_norm": 1.0793452262878418, "learning_rate": 4.579033946463287e-05, "loss": 0.0525, "step": 28200 }, { "epoch": 8.46, "grad_norm": 1.4804214239120483, "learning_rate": 4.577538507551967e-05, "loss": 0.0459, "step": 28300 }, { "epoch": 8.49, "grad_norm": 0.9862244129180908, "learning_rate": 4.576043068640646e-05, "loss": 0.0534, "step": 28400 }, { "epoch": 8.52, "grad_norm": 1.26304030418396, "learning_rate": 4.574547629729326e-05, "loss": 0.048, "step": 28500 }, { "epoch": 8.55, "grad_norm": 0.4214903712272644, "learning_rate": 4.573052190818005e-05, "loss": 0.0547, "step": 28600 }, { "epoch": 8.58, "grad_norm": 0.9271091222763062, "learning_rate": 4.571556751906685e-05, "loss": 0.0537, "step": 28700 }, { "epoch": 8.61, "grad_norm": 0.8437818884849548, "learning_rate": 4.570061312995365e-05, "loss": 0.0537, "step": 28800 }, { "epoch": 8.64, "grad_norm": 0.8551807999610901, "learning_rate": 4.5685658740840434e-05, "loss": 0.0461, "step": 28900 }, { "epoch": 8.67, "grad_norm": 1.8268975019454956, "learning_rate": 4.5670704351727234e-05, "loss": 0.046, "step": 29000 }, { "epoch": 8.67, "eval_loss": 0.20938238501548767, "eval_precision": 0.9151901573163308, "eval_recall": 0.8794605745250778, "eval_runtime": 302.034, "eval_samples_per_second": 44.28, "eval_steps_per_second": 1.384, "step": 29000 }, { "epoch": 8.7, "grad_norm": 0.08975500613451004, "learning_rate": 4.565574996261403e-05, "loss": 0.0493, "step": 29100 }, { "epoch": 8.73, "grad_norm": 2.3698606491088867, "learning_rate": 4.564079557350082e-05, "loss": 0.0506, "step": 29200 }, { "epoch": 8.76, "grad_norm": 1.1118419170379639, "learning_rate": 4.562584118438762e-05, "loss": 0.0445, "step": 29300 }, { "epoch": 8.79, "grad_norm": 1.8186097145080566, "learning_rate": 4.561088679527442e-05, "loss": 0.0471, "step": 29400 }, { "epoch": 8.82, "grad_norm": 1.4056422710418701, "learning_rate": 4.559593240616121e-05, "loss": 0.0513, "step": 29500 }, { "epoch": 8.85, "grad_norm": 1.5597076416015625, "learning_rate": 4.5580978017048004e-05, "loss": 0.0452, "step": 29600 }, { "epoch": 8.88, "grad_norm": 0.8287553191184998, "learning_rate": 4.5566023627934804e-05, "loss": 0.0523, "step": 29700 }, { "epoch": 8.91, "grad_norm": 0.6897550821304321, "learning_rate": 4.55510692388216e-05, "loss": 0.0466, "step": 29800 }, { "epoch": 8.94, "grad_norm": 0.7071977853775024, "learning_rate": 4.553611484970839e-05, "loss": 0.0434, "step": 29900 }, { "epoch": 8.97, "grad_norm": 0.6574975252151489, "learning_rate": 4.552116046059518e-05, "loss": 0.0495, "step": 30000 }, { "epoch": 8.97, "eval_loss": 0.20542754232883453, "eval_precision": 0.9183409556852231, "eval_recall": 0.8964561716801626, "eval_runtime": 302.3305, "eval_samples_per_second": 44.236, "eval_steps_per_second": 1.383, "step": 30000 }, { "epoch": 9.0, "grad_norm": 1.3489534854888916, "learning_rate": 4.550620607148198e-05, "loss": 0.0499, "step": 30100 }, { "epoch": 9.03, "grad_norm": 1.0300263166427612, "learning_rate": 4.549125168236878e-05, "loss": 0.0353, "step": 30200 }, { "epoch": 9.06, "grad_norm": 0.4393318295478821, "learning_rate": 4.547629729325557e-05, "loss": 0.0352, "step": 30300 }, { "epoch": 9.09, "grad_norm": 0.4519498944282532, "learning_rate": 4.546134290414237e-05, "loss": 0.0342, "step": 30400 }, { "epoch": 9.12, "grad_norm": 0.9631327986717224, "learning_rate": 4.544638851502917e-05, "loss": 0.0364, "step": 30500 }, { "epoch": 9.15, "grad_norm": 2.7282943725585938, "learning_rate": 4.5431434125915954e-05, "loss": 0.0354, "step": 30600 }, { "epoch": 9.18, "grad_norm": 0.5908452272415161, "learning_rate": 4.541647973680275e-05, "loss": 0.0356, "step": 30700 }, { "epoch": 9.21, "grad_norm": 2.3660802841186523, "learning_rate": 4.540152534768955e-05, "loss": 0.0413, "step": 30800 }, { "epoch": 9.24, "grad_norm": 1.7346217632293701, "learning_rate": 4.5386570958576346e-05, "loss": 0.036, "step": 30900 }, { "epoch": 9.27, "grad_norm": 1.0829362869262695, "learning_rate": 4.537161656946314e-05, "loss": 0.0376, "step": 31000 }, { "epoch": 9.27, "eval_loss": 0.226752370595932, "eval_precision": 0.925325841962565, "eval_recall": 0.8721635518334924, "eval_runtime": 302.3165, "eval_samples_per_second": 44.238, "eval_steps_per_second": 1.383, "step": 31000 }, { "epoch": 9.3, "grad_norm": 1.2249701023101807, "learning_rate": 4.535666218034993e-05, "loss": 0.039, "step": 31100 }, { "epoch": 9.33, "grad_norm": 2.201986789703369, "learning_rate": 4.534170779123673e-05, "loss": 0.0384, "step": 31200 }, { "epoch": 9.36, "grad_norm": 0.31157541275024414, "learning_rate": 4.5326753402123524e-05, "loss": 0.0318, "step": 31300 }, { "epoch": 9.39, "grad_norm": 0.7502834796905518, "learning_rate": 4.531179901301032e-05, "loss": 0.0397, "step": 31400 }, { "epoch": 9.42, "grad_norm": 0.3627040684223175, "learning_rate": 4.529684462389712e-05, "loss": 0.0389, "step": 31500 }, { "epoch": 9.45, "grad_norm": 2.008009672164917, "learning_rate": 4.5281890234783916e-05, "loss": 0.042, "step": 31600 }, { "epoch": 9.48, "grad_norm": 2.5352540016174316, "learning_rate": 4.52669358456707e-05, "loss": 0.0407, "step": 31700 }, { "epoch": 9.51, "grad_norm": 0.543992280960083, "learning_rate": 4.52519814565575e-05, "loss": 0.0309, "step": 31800 }, { "epoch": 9.54, "grad_norm": 1.3150848150253296, "learning_rate": 4.52370270674443e-05, "loss": 0.0369, "step": 31900 }, { "epoch": 9.57, "grad_norm": 1.6026105880737305, "learning_rate": 4.522207267833109e-05, "loss": 0.0418, "step": 32000 }, { "epoch": 9.57, "eval_loss": 0.21585828065872192, "eval_precision": 0.9208557844690967, "eval_recall": 0.8945164567874627, "eval_runtime": 303.0508, "eval_samples_per_second": 44.131, "eval_steps_per_second": 1.379, "step": 32000 }, { "epoch": 9.6, "grad_norm": 1.8489359617233276, "learning_rate": 4.520711828921789e-05, "loss": 0.0427, "step": 32100 }, { "epoch": 9.63, "grad_norm": 2.4979922771453857, "learning_rate": 4.519216390010468e-05, "loss": 0.0337, "step": 32200 }, { "epoch": 9.66, "grad_norm": 0.3452712595462799, "learning_rate": 4.517720951099148e-05, "loss": 0.0347, "step": 32300 }, { "epoch": 9.69, "grad_norm": 1.081455945968628, "learning_rate": 4.516225512187827e-05, "loss": 0.047, "step": 32400 }, { "epoch": 9.72, "grad_norm": 2.3087069988250732, "learning_rate": 4.5147300732765066e-05, "loss": 0.0404, "step": 32500 }, { "epoch": 9.75, "grad_norm": 1.901135802268982, "learning_rate": 4.5132346343651865e-05, "loss": 0.0394, "step": 32600 }, { "epoch": 9.78, "grad_norm": 1.2389637231826782, "learning_rate": 4.511739195453866e-05, "loss": 0.0376, "step": 32700 }, { "epoch": 9.81, "grad_norm": 0.619143545627594, "learning_rate": 4.510243756542545e-05, "loss": 0.0414, "step": 32800 }, { "epoch": 9.84, "grad_norm": 1.3270721435546875, "learning_rate": 4.508748317631225e-05, "loss": 0.0405, "step": 32900 }, { "epoch": 9.87, "grad_norm": 2.503606081008911, "learning_rate": 4.507252878719905e-05, "loss": 0.0493, "step": 33000 }, { "epoch": 9.87, "eval_loss": 0.20709815621376038, "eval_precision": 0.9246134231259603, "eval_recall": 0.8708088303211305, "eval_runtime": 301.957, "eval_samples_per_second": 44.291, "eval_steps_per_second": 1.384, "step": 33000 }, { "epoch": 9.9, "grad_norm": 0.6343371868133545, "learning_rate": 4.505757439808584e-05, "loss": 0.0365, "step": 33100 }, { "epoch": 9.93, "grad_norm": 0.3116106688976288, "learning_rate": 4.5042620008972636e-05, "loss": 0.0358, "step": 33200 }, { "epoch": 9.96, "grad_norm": 0.7307326197624207, "learning_rate": 4.5027665619859436e-05, "loss": 0.0411, "step": 33300 }, { "epoch": 9.99, "grad_norm": 2.104717493057251, "learning_rate": 4.501271123074622e-05, "loss": 0.0401, "step": 33400 }, { "epoch": 10.02, "grad_norm": 3.8659448623657227, "learning_rate": 4.499775684163302e-05, "loss": 0.0348, "step": 33500 }, { "epoch": 10.05, "grad_norm": 1.0324366092681885, "learning_rate": 4.4982802452519815e-05, "loss": 0.0344, "step": 33600 }, { "epoch": 10.08, "grad_norm": 1.0838052034378052, "learning_rate": 4.4967848063406614e-05, "loss": 0.0327, "step": 33700 }, { "epoch": 10.11, "grad_norm": 1.8709659576416016, "learning_rate": 4.495289367429341e-05, "loss": 0.0267, "step": 33800 }, { "epoch": 10.14, "grad_norm": 0.4261041283607483, "learning_rate": 4.49379392851802e-05, "loss": 0.0305, "step": 33900 }, { "epoch": 10.17, "grad_norm": 0.16497644782066345, "learning_rate": 4.4922984896067e-05, "loss": 0.0276, "step": 34000 }, { "epoch": 10.17, "eval_loss": 0.2343963235616684, "eval_precision": 0.9252133285746731, "eval_recall": 0.8779826965115921, "eval_runtime": 301.9423, "eval_samples_per_second": 44.293, "eval_steps_per_second": 1.384, "step": 34000 }, { "epoch": 10.2, "grad_norm": 2.9655115604400635, "learning_rate": 4.490803050695379e-05, "loss": 0.0268, "step": 34100 }, { "epoch": 10.23, "grad_norm": 1.536979079246521, "learning_rate": 4.4893076117840586e-05, "loss": 0.0299, "step": 34200 }, { "epoch": 10.26, "grad_norm": 2.8167715072631836, "learning_rate": 4.4878121728727385e-05, "loss": 0.0325, "step": 34300 }, { "epoch": 10.29, "grad_norm": 2.1207668781280518, "learning_rate": 4.4863167339614185e-05, "loss": 0.029, "step": 34400 }, { "epoch": 10.32, "grad_norm": 2.277759552001953, "learning_rate": 4.484821295050097e-05, "loss": 0.0308, "step": 34500 }, { "epoch": 10.35, "grad_norm": 1.226417899131775, "learning_rate": 4.483325856138777e-05, "loss": 0.0299, "step": 34600 }, { "epoch": 10.38, "grad_norm": 0.63482266664505, "learning_rate": 4.4818304172274563e-05, "loss": 0.0337, "step": 34700 }, { "epoch": 10.41, "grad_norm": 1.8453493118286133, "learning_rate": 4.4803349783161356e-05, "loss": 0.0346, "step": 34800 }, { "epoch": 10.44, "grad_norm": 0.40149375796318054, "learning_rate": 4.4788395394048156e-05, "loss": 0.03, "step": 34900 }, { "epoch": 10.47, "grad_norm": 0.3980793058872223, "learning_rate": 4.477344100493495e-05, "loss": 0.035, "step": 35000 }, { "epoch": 10.47, "eval_loss": 0.22229593992233276, "eval_precision": 0.9262946269334285, "eval_recall": 0.8795221527756396, "eval_runtime": 302.9773, "eval_samples_per_second": 44.142, "eval_steps_per_second": 1.38, "step": 35000 }, { "epoch": 10.5, "grad_norm": 0.629266083240509, "learning_rate": 4.475848661582174e-05, "loss": 0.0363, "step": 35100 }, { "epoch": 10.53, "grad_norm": 1.134805679321289, "learning_rate": 4.474353222670854e-05, "loss": 0.0343, "step": 35200 }, { "epoch": 10.56, "grad_norm": 1.9168953895568848, "learning_rate": 4.4728577837595334e-05, "loss": 0.0333, "step": 35300 }, { "epoch": 10.59, "grad_norm": 0.7437408566474915, "learning_rate": 4.4713623448482134e-05, "loss": 0.0377, "step": 35400 }, { "epoch": 10.62, "grad_norm": 0.8649216890335083, "learning_rate": 4.469866905936893e-05, "loss": 0.0387, "step": 35500 }, { "epoch": 10.65, "grad_norm": 1.9679126739501953, "learning_rate": 4.468371467025572e-05, "loss": 0.0324, "step": 35600 }, { "epoch": 10.68, "grad_norm": 1.0343681573867798, "learning_rate": 4.466876028114252e-05, "loss": 0.0371, "step": 35700 }, { "epoch": 10.71, "grad_norm": 0.3291555941104889, "learning_rate": 4.465380589202931e-05, "loss": 0.0339, "step": 35800 }, { "epoch": 10.74, "grad_norm": 1.2407808303833008, "learning_rate": 4.4638851502916105e-05, "loss": 0.0376, "step": 35900 }, { "epoch": 10.77, "grad_norm": 1.2906955480575562, "learning_rate": 4.4623897113802905e-05, "loss": 0.0348, "step": 36000 }, { "epoch": 10.77, "eval_loss": 0.22172214090824127, "eval_precision": 0.9251365945617791, "eval_recall": 0.8914683333846486, "eval_runtime": 302.63, "eval_samples_per_second": 44.193, "eval_steps_per_second": 1.381, "step": 36000 }, { "epoch": 10.8, "grad_norm": 0.9678496718406677, "learning_rate": 4.46089427246897e-05, "loss": 0.0354, "step": 36100 }, { "epoch": 10.83, "grad_norm": 1.92240571975708, "learning_rate": 4.459398833557649e-05, "loss": 0.0324, "step": 36200 }, { "epoch": 10.86, "grad_norm": 2.5916824340820312, "learning_rate": 4.457903394646329e-05, "loss": 0.034, "step": 36300 }, { "epoch": 10.89, "grad_norm": 1.4677050113677979, "learning_rate": 4.456407955735008e-05, "loss": 0.0304, "step": 36400 }, { "epoch": 10.92, "grad_norm": 1.1423336267471313, "learning_rate": 4.4549125168236876e-05, "loss": 0.0315, "step": 36500 }, { "epoch": 10.94, "grad_norm": 1.0664762258529663, "learning_rate": 4.4534170779123676e-05, "loss": 0.0371, "step": 36600 }, { "epoch": 10.97, "grad_norm": 1.344557762145996, "learning_rate": 4.451921639001047e-05, "loss": 0.0334, "step": 36700 }, { "epoch": 11.0, "grad_norm": 2.944450616836548, "learning_rate": 4.450426200089727e-05, "loss": 0.0312, "step": 36800 }, { "epoch": 11.03, "grad_norm": 1.02321195602417, "learning_rate": 4.448930761178406e-05, "loss": 0.0243, "step": 36900 }, { "epoch": 11.06, "grad_norm": 1.4520535469055176, "learning_rate": 4.4474353222670854e-05, "loss": 0.0263, "step": 37000 }, { "epoch": 11.06, "eval_loss": 0.23973342776298523, "eval_precision": 0.928783958602846, "eval_recall": 0.8842020998183442, "eval_runtime": 302.1259, "eval_samples_per_second": 44.266, "eval_steps_per_second": 1.384, "step": 37000 }, { "epoch": 11.09, "grad_norm": 0.9927899837493896, "learning_rate": 4.4459398833557654e-05, "loss": 0.0251, "step": 37100 }, { "epoch": 11.12, "grad_norm": 0.7255445122718811, "learning_rate": 4.4444444444444447e-05, "loss": 0.023, "step": 37200 }, { "epoch": 11.15, "grad_norm": 1.2551404237747192, "learning_rate": 4.442949005533124e-05, "loss": 0.0282, "step": 37300 }, { "epoch": 11.18, "grad_norm": 1.8652236461639404, "learning_rate": 4.441453566621804e-05, "loss": 0.0265, "step": 37400 }, { "epoch": 11.21, "grad_norm": 0.29598140716552734, "learning_rate": 4.439958127710483e-05, "loss": 0.0231, "step": 37500 }, { "epoch": 11.24, "grad_norm": 0.517977774143219, "learning_rate": 4.4384626887991625e-05, "loss": 0.0266, "step": 37600 }, { "epoch": 11.27, "grad_norm": 1.3159215450286865, "learning_rate": 4.4369672498878425e-05, "loss": 0.0246, "step": 37700 }, { "epoch": 11.3, "grad_norm": 1.8311362266540527, "learning_rate": 4.435471810976522e-05, "loss": 0.0325, "step": 37800 }, { "epoch": 11.33, "grad_norm": 2.8861258029937744, "learning_rate": 4.433976372065201e-05, "loss": 0.0303, "step": 37900 }, { "epoch": 11.36, "grad_norm": 0.6612695455551147, "learning_rate": 4.432480933153881e-05, "loss": 0.0284, "step": 38000 }, { "epoch": 11.36, "eval_loss": 0.23250487446784973, "eval_precision": 0.9248716302952503, "eval_recall": 0.8873118014717202, "eval_runtime": 302.5481, "eval_samples_per_second": 44.205, "eval_steps_per_second": 1.382, "step": 38000 }, { "epoch": 11.39, "grad_norm": 0.8181266784667969, "learning_rate": 4.43098549424256e-05, "loss": 0.0251, "step": 38100 }, { "epoch": 11.42, "grad_norm": 0.48834991455078125, "learning_rate": 4.42949005533124e-05, "loss": 0.0313, "step": 38200 }, { "epoch": 11.45, "grad_norm": 0.4897523820400238, "learning_rate": 4.4279946164199195e-05, "loss": 0.0328, "step": 38300 }, { "epoch": 11.48, "grad_norm": 0.7222294807434082, "learning_rate": 4.426499177508599e-05, "loss": 0.0298, "step": 38400 }, { "epoch": 11.51, "grad_norm": 0.07086914777755737, "learning_rate": 4.425003738597279e-05, "loss": 0.032, "step": 38500 }, { "epoch": 11.54, "grad_norm": 1.4812002182006836, "learning_rate": 4.423508299685958e-05, "loss": 0.0282, "step": 38600 }, { "epoch": 11.57, "grad_norm": 1.302590012550354, "learning_rate": 4.4220128607746374e-05, "loss": 0.027, "step": 38700 }, { "epoch": 11.6, "grad_norm": 1.9532426595687866, "learning_rate": 4.420517421863317e-05, "loss": 0.0304, "step": 38800 }, { "epoch": 11.63, "grad_norm": 2.029754638671875, "learning_rate": 4.4190219829519966e-05, "loss": 0.0301, "step": 38900 }, { "epoch": 11.66, "grad_norm": 1.320448398590088, "learning_rate": 4.417526544040676e-05, "loss": 0.0277, "step": 39000 }, { "epoch": 11.66, "eval_loss": 0.241913303732872, "eval_precision": 0.9234215627085253, "eval_recall": 0.8947627697897103, "eval_runtime": 303.3685, "eval_samples_per_second": 44.085, "eval_steps_per_second": 1.378, "step": 39000 }, { "epoch": 11.69, "grad_norm": 1.8267722129821777, "learning_rate": 4.416031105129356e-05, "loss": 0.0249, "step": 39100 }, { "epoch": 11.72, "grad_norm": 0.7122277021408081, "learning_rate": 4.414535666218035e-05, "loss": 0.0323, "step": 39200 }, { "epoch": 11.75, "grad_norm": 0.5691227316856384, "learning_rate": 4.4130402273067145e-05, "loss": 0.0325, "step": 39300 }, { "epoch": 11.78, "grad_norm": 0.40894216299057007, "learning_rate": 4.4115447883953944e-05, "loss": 0.0301, "step": 39400 }, { "epoch": 11.81, "grad_norm": 2.4805972576141357, "learning_rate": 4.410049349484074e-05, "loss": 0.0277, "step": 39500 }, { "epoch": 11.84, "grad_norm": 1.2774219512939453, "learning_rate": 4.408553910572754e-05, "loss": 0.0278, "step": 39600 }, { "epoch": 11.87, "grad_norm": 1.267562985420227, "learning_rate": 4.407058471661433e-05, "loss": 0.0286, "step": 39700 }, { "epoch": 11.9, "grad_norm": 0.6910821795463562, "learning_rate": 4.405563032750112e-05, "loss": 0.0344, "step": 39800 }, { "epoch": 11.93, "grad_norm": 0.3539283275604248, "learning_rate": 4.404067593838792e-05, "loss": 0.0298, "step": 39900 }, { "epoch": 11.96, "grad_norm": 1.7098407745361328, "learning_rate": 4.4025721549274715e-05, "loss": 0.0318, "step": 40000 }, { "epoch": 11.96, "eval_loss": 0.23493793606758118, "eval_precision": 0.9253437490076529, "eval_recall": 0.8971951106869054, "eval_runtime": 302.3541, "eval_samples_per_second": 44.233, "eval_steps_per_second": 1.382, "step": 40000 }, { "epoch": 11.99, "grad_norm": 2.1748311519622803, "learning_rate": 4.401076716016151e-05, "loss": 0.0312, "step": 40100 }, { "epoch": 12.02, "grad_norm": 0.8426460027694702, "learning_rate": 4.399581277104831e-05, "loss": 0.0262, "step": 40200 }, { "epoch": 12.05, "grad_norm": 0.3200826048851013, "learning_rate": 4.39808583819351e-05, "loss": 0.0237, "step": 40300 }, { "epoch": 12.08, "grad_norm": 0.2708234488964081, "learning_rate": 4.3965903992821893e-05, "loss": 0.0229, "step": 40400 }, { "epoch": 12.11, "grad_norm": 1.4237157106399536, "learning_rate": 4.395094960370869e-05, "loss": 0.0198, "step": 40500 }, { "epoch": 12.14, "grad_norm": 0.06805676221847534, "learning_rate": 4.3935995214595486e-05, "loss": 0.026, "step": 40600 }, { "epoch": 12.17, "grad_norm": 1.2842926979064941, "learning_rate": 4.392104082548228e-05, "loss": 0.0241, "step": 40700 }, { "epoch": 12.2, "grad_norm": 1.5190855264663696, "learning_rate": 4.390608643636908e-05, "loss": 0.0232, "step": 40800 }, { "epoch": 12.23, "grad_norm": 1.8280004262924194, "learning_rate": 4.389113204725587e-05, "loss": 0.0241, "step": 40900 }, { "epoch": 12.26, "grad_norm": 0.19059352576732635, "learning_rate": 4.3876177658142664e-05, "loss": 0.0238, "step": 41000 }, { "epoch": 12.26, "eval_loss": 0.24695585668087006, "eval_precision": 0.9256610729722858, "eval_recall": 0.8967332738076911, "eval_runtime": 302.2734, "eval_samples_per_second": 44.245, "eval_steps_per_second": 1.383, "step": 41000 }, { "epoch": 12.29, "grad_norm": 0.40746474266052246, "learning_rate": 4.3861223269029464e-05, "loss": 0.0232, "step": 41100 }, { "epoch": 12.32, "grad_norm": 1.2412996292114258, "learning_rate": 4.384626887991626e-05, "loss": 0.0215, "step": 41200 }, { "epoch": 12.35, "grad_norm": 0.2166558802127838, "learning_rate": 4.3831314490803056e-05, "loss": 0.0237, "step": 41300 }, { "epoch": 12.38, "grad_norm": 0.719872236251831, "learning_rate": 4.381636010168985e-05, "loss": 0.0253, "step": 41400 }, { "epoch": 12.41, "grad_norm": 1.5946626663208008, "learning_rate": 4.380140571257664e-05, "loss": 0.0235, "step": 41500 }, { "epoch": 12.44, "grad_norm": 1.0119950771331787, "learning_rate": 4.378645132346344e-05, "loss": 0.0257, "step": 41600 }, { "epoch": 12.47, "grad_norm": 0.9327923059463501, "learning_rate": 4.377149693435023e-05, "loss": 0.0243, "step": 41700 }, { "epoch": 12.5, "grad_norm": 0.41256028413772583, "learning_rate": 4.375654254523703e-05, "loss": 0.0272, "step": 41800 }, { "epoch": 12.53, "grad_norm": 0.1845785677433014, "learning_rate": 4.374158815612383e-05, "loss": 0.029, "step": 41900 }, { "epoch": 12.56, "grad_norm": 1.754239559173584, "learning_rate": 4.372663376701062e-05, "loss": 0.0252, "step": 42000 }, { "epoch": 12.56, "eval_loss": 0.2473253309726715, "eval_precision": 0.9269791733010636, "eval_recall": 0.8962406478031959, "eval_runtime": 304.5592, "eval_samples_per_second": 43.913, "eval_steps_per_second": 1.372, "step": 42000 }, { "epoch": 12.59, "grad_norm": 0.5748271346092224, "learning_rate": 4.371167937789741e-05, "loss": 0.0281, "step": 42100 }, { "epoch": 12.62, "grad_norm": 0.36274582147598267, "learning_rate": 4.369672498878421e-05, "loss": 0.0248, "step": 42200 }, { "epoch": 12.65, "grad_norm": 0.6130300164222717, "learning_rate": 4.3681770599671006e-05, "loss": 0.0269, "step": 42300 }, { "epoch": 12.68, "grad_norm": 1.2477418184280396, "learning_rate": 4.36668162105578e-05, "loss": 0.0259, "step": 42400 }, { "epoch": 12.71, "grad_norm": 0.8152483701705933, "learning_rate": 4.36518618214446e-05, "loss": 0.0263, "step": 42500 }, { "epoch": 12.74, "grad_norm": 0.04731460288167, "learning_rate": 4.363690743233139e-05, "loss": 0.024, "step": 42600 }, { "epoch": 12.77, "grad_norm": 0.7886996865272522, "learning_rate": 4.362195304321819e-05, "loss": 0.0245, "step": 42700 }, { "epoch": 12.8, "grad_norm": 2.1900315284729004, "learning_rate": 4.360699865410498e-05, "loss": 0.0292, "step": 42800 }, { "epoch": 12.83, "grad_norm": 0.45924192667007446, "learning_rate": 4.3592044264991777e-05, "loss": 0.0261, "step": 42900 }, { "epoch": 12.86, "grad_norm": 0.07307754456996918, "learning_rate": 4.3577089875878576e-05, "loss": 0.0248, "step": 43000 }, { "epoch": 12.86, "eval_loss": 0.24504822492599487, "eval_precision": 0.9273960876319711, "eval_recall": 0.9006127035930909, "eval_runtime": 303.9567, "eval_samples_per_second": 44.0, "eval_steps_per_second": 1.375, "step": 43000 }, { "epoch": 12.89, "grad_norm": 0.4676400423049927, "learning_rate": 4.356213548676536e-05, "loss": 0.0232, "step": 43100 }, { "epoch": 12.92, "grad_norm": 0.2993585765361786, "learning_rate": 4.354718109765216e-05, "loss": 0.0237, "step": 43200 }, { "epoch": 12.95, "grad_norm": 1.226276159286499, "learning_rate": 4.353222670853896e-05, "loss": 0.0256, "step": 43300 }, { "epoch": 12.98, "grad_norm": 1.5110477209091187, "learning_rate": 4.3517272319425754e-05, "loss": 0.0285, "step": 43400 }, { "epoch": 13.01, "grad_norm": 1.6162513494491577, "learning_rate": 4.350231793031255e-05, "loss": 0.0219, "step": 43500 }, { "epoch": 13.04, "grad_norm": 0.1792839914560318, "learning_rate": 4.348736354119935e-05, "loss": 0.0191, "step": 43600 }, { "epoch": 13.07, "grad_norm": 1.9044649600982666, "learning_rate": 4.347240915208614e-05, "loss": 0.017, "step": 43700 }, { "epoch": 13.1, "grad_norm": 0.5899202823638916, "learning_rate": 4.345745476297293e-05, "loss": 0.0241, "step": 43800 }, { "epoch": 13.13, "grad_norm": 0.6521077752113342, "learning_rate": 4.344250037385973e-05, "loss": 0.0216, "step": 43900 }, { "epoch": 13.16, "grad_norm": 0.7596339583396912, "learning_rate": 4.3427545984746525e-05, "loss": 0.0181, "step": 44000 }, { "epoch": 13.16, "eval_loss": 0.2613174319267273, "eval_precision": 0.9276514907592247, "eval_recall": 0.8870654884694725, "eval_runtime": 304.3764, "eval_samples_per_second": 43.939, "eval_steps_per_second": 1.373, "step": 44000 }, { "epoch": 13.19, "grad_norm": 1.0404387712478638, "learning_rate": 4.3412591595633325e-05, "loss": 0.0247, "step": 44100 }, { "epoch": 13.22, "grad_norm": 1.7849115133285522, "learning_rate": 4.339763720652011e-05, "loss": 0.0188, "step": 44200 }, { "epoch": 13.25, "grad_norm": 1.0972092151641846, "learning_rate": 4.338268281740691e-05, "loss": 0.0255, "step": 44300 }, { "epoch": 13.28, "grad_norm": 0.7391771078109741, "learning_rate": 4.336772842829371e-05, "loss": 0.0225, "step": 44400 }, { "epoch": 13.31, "grad_norm": 1.5010148286819458, "learning_rate": 4.3352774039180497e-05, "loss": 0.0217, "step": 44500 }, { "epoch": 13.34, "grad_norm": 0.7189137935638428, "learning_rate": 4.3337819650067296e-05, "loss": 0.0211, "step": 44600 }, { "epoch": 13.37, "grad_norm": 1.003636121749878, "learning_rate": 4.3322865260954096e-05, "loss": 0.0236, "step": 44700 }, { "epoch": 13.4, "grad_norm": 0.914703369140625, "learning_rate": 4.330791087184089e-05, "loss": 0.0224, "step": 44800 }, { "epoch": 13.43, "grad_norm": 0.1861487776041031, "learning_rate": 4.329295648272768e-05, "loss": 0.0251, "step": 44900 }, { "epoch": 13.46, "grad_norm": 0.7734150886535645, "learning_rate": 4.327800209361448e-05, "loss": 0.0254, "step": 45000 }, { "epoch": 13.46, "eval_loss": 0.2583397924900055, "eval_precision": 0.9213451745124829, "eval_recall": 0.9135441362110902, "eval_runtime": 305.1941, "eval_samples_per_second": 43.821, "eval_steps_per_second": 1.37, "step": 45000 }, { "epoch": 13.49, "grad_norm": 0.7596560716629028, "learning_rate": 4.3263047704501274e-05, "loss": 0.0246, "step": 45100 }, { "epoch": 13.52, "grad_norm": 1.4200429916381836, "learning_rate": 4.324809331538807e-05, "loss": 0.0174, "step": 45200 }, { "epoch": 13.55, "grad_norm": 2.7082788944244385, "learning_rate": 4.323313892627486e-05, "loss": 0.026, "step": 45300 }, { "epoch": 13.58, "grad_norm": 1.2132717370986938, "learning_rate": 4.321818453716166e-05, "loss": 0.0228, "step": 45400 }, { "epoch": 13.61, "grad_norm": 3.768927812576294, "learning_rate": 4.320323014804846e-05, "loss": 0.0236, "step": 45500 }, { "epoch": 13.64, "grad_norm": 1.5163260698318481, "learning_rate": 4.3188275758935245e-05, "loss": 0.0189, "step": 45600 }, { "epoch": 13.67, "grad_norm": 0.7969369888305664, "learning_rate": 4.3173321369822045e-05, "loss": 0.0245, "step": 45700 }, { "epoch": 13.7, "grad_norm": 1.445375680923462, "learning_rate": 4.3158366980708845e-05, "loss": 0.0232, "step": 45800 }, { "epoch": 13.73, "grad_norm": 0.04813400283455849, "learning_rate": 4.314341259159563e-05, "loss": 0.0215, "step": 45900 }, { "epoch": 13.76, "grad_norm": 2.0303447246551514, "learning_rate": 4.312845820248243e-05, "loss": 0.0206, "step": 46000 }, { "epoch": 13.76, "eval_loss": 0.2769757807254791, "eval_precision": 0.9277020832674738, "eval_recall": 0.9035376704947813, "eval_runtime": 304.0355, "eval_samples_per_second": 43.988, "eval_steps_per_second": 1.375, "step": 46000 }, { "epoch": 13.79, "grad_norm": 0.9254265427589417, "learning_rate": 4.311350381336923e-05, "loss": 0.0203, "step": 46100 }, { "epoch": 13.82, "grad_norm": 2.1310763359069824, "learning_rate": 4.309854942425602e-05, "loss": 0.0206, "step": 46200 }, { "epoch": 13.85, "grad_norm": 0.5353107452392578, "learning_rate": 4.3083595035142816e-05, "loss": 0.0206, "step": 46300 }, { "epoch": 13.88, "grad_norm": 0.9395775198936462, "learning_rate": 4.306864064602961e-05, "loss": 0.0304, "step": 46400 }, { "epoch": 13.91, "grad_norm": 0.056145694106817245, "learning_rate": 4.305368625691641e-05, "loss": 0.0237, "step": 46500 }, { "epoch": 13.94, "grad_norm": 0.03264997899532318, "learning_rate": 4.30387318678032e-05, "loss": 0.0244, "step": 46600 }, { "epoch": 13.97, "grad_norm": 1.6055926084518433, "learning_rate": 4.3023777478689994e-05, "loss": 0.0224, "step": 46700 }, { "epoch": 14.0, "grad_norm": 1.4891152381896973, "learning_rate": 4.3008823089576794e-05, "loss": 0.021, "step": 46800 }, { "epoch": 14.03, "grad_norm": 0.3057061731815338, "learning_rate": 4.299386870046359e-05, "loss": 0.0173, "step": 46900 }, { "epoch": 14.06, "grad_norm": 1.0254565477371216, "learning_rate": 4.297891431135038e-05, "loss": 0.017, "step": 47000 }, { "epoch": 14.06, "eval_loss": 0.2714207172393799, "eval_precision": 0.9283886660138359, "eval_recall": 0.9048923920071431, "eval_runtime": 302.2817, "eval_samples_per_second": 44.244, "eval_steps_per_second": 1.383, "step": 47000 }, { "epoch": 14.08, "grad_norm": 0.6178631782531738, "learning_rate": 4.296395992223718e-05, "loss": 0.021, "step": 47100 }, { "epoch": 14.11, "grad_norm": 3.516096353530884, "learning_rate": 4.294900553312398e-05, "loss": 0.0181, "step": 47200 }, { "epoch": 14.14, "grad_norm": 0.20362690091133118, "learning_rate": 4.2934051144010765e-05, "loss": 0.0193, "step": 47300 }, { "epoch": 14.17, "grad_norm": 2.5930867195129395, "learning_rate": 4.2919096754897565e-05, "loss": 0.0176, "step": 47400 }, { "epoch": 14.2, "grad_norm": 1.4823873043060303, "learning_rate": 4.2904142365784364e-05, "loss": 0.0173, "step": 47500 }, { "epoch": 14.23, "grad_norm": 0.5278753042221069, "learning_rate": 4.288918797667115e-05, "loss": 0.0212, "step": 47600 }, { "epoch": 14.26, "grad_norm": 1.855218529701233, "learning_rate": 4.287423358755795e-05, "loss": 0.0199, "step": 47700 }, { "epoch": 14.29, "grad_norm": 0.31464433670043945, "learning_rate": 4.285927919844474e-05, "loss": 0.0241, "step": 47800 }, { "epoch": 14.32, "grad_norm": 0.2182936817407608, "learning_rate": 4.284432480933154e-05, "loss": 0.0172, "step": 47900 }, { "epoch": 14.35, "grad_norm": 1.2800421714782715, "learning_rate": 4.2829370420218336e-05, "loss": 0.0188, "step": 48000 }, { "epoch": 14.35, "eval_loss": 0.26452192664146423, "eval_precision": 0.9272217673363986, "eval_recall": 0.9065242156470334, "eval_runtime": 302.9199, "eval_samples_per_second": 44.15, "eval_steps_per_second": 1.38, "step": 48000 }, { "epoch": 14.38, "grad_norm": 3.320737361907959, "learning_rate": 4.281441603110513e-05, "loss": 0.0198, "step": 48100 }, { "epoch": 14.41, "grad_norm": 0.8519121408462524, "learning_rate": 4.279946164199193e-05, "loss": 0.0182, "step": 48200 }, { "epoch": 14.44, "grad_norm": 0.4318147599697113, "learning_rate": 4.278450725287872e-05, "loss": 0.0178, "step": 48300 }, { "epoch": 14.47, "grad_norm": 0.047759074717760086, "learning_rate": 4.2769552863765514e-05, "loss": 0.021, "step": 48400 }, { "epoch": 14.5, "grad_norm": 1.6022422313690186, "learning_rate": 4.2754598474652314e-05, "loss": 0.0144, "step": 48500 }, { "epoch": 14.53, "grad_norm": 0.7104184031486511, "learning_rate": 4.273964408553911e-05, "loss": 0.0207, "step": 48600 }, { "epoch": 14.56, "grad_norm": 1.5093780755996704, "learning_rate": 4.27246896964259e-05, "loss": 0.0205, "step": 48700 }, { "epoch": 14.59, "grad_norm": 0.7566470503807068, "learning_rate": 4.27097353073127e-05, "loss": 0.0187, "step": 48800 }, { "epoch": 14.62, "grad_norm": 1.222693920135498, "learning_rate": 4.269478091819949e-05, "loss": 0.0199, "step": 48900 }, { "epoch": 14.65, "grad_norm": 1.5546650886535645, "learning_rate": 4.2679826529086285e-05, "loss": 0.0188, "step": 49000 }, { "epoch": 14.65, "eval_loss": 0.2760772109031677, "eval_precision": 0.9305101058710299, "eval_recall": 0.8930077896486961, "eval_runtime": 301.8588, "eval_samples_per_second": 44.305, "eval_steps_per_second": 1.385, "step": 49000 }, { "epoch": 14.68, "grad_norm": 0.6152912378311157, "learning_rate": 4.2664872139973084e-05, "loss": 0.0199, "step": 49100 }, { "epoch": 14.71, "grad_norm": 0.8479551672935486, "learning_rate": 4.264991775085988e-05, "loss": 0.0236, "step": 49200 }, { "epoch": 14.74, "grad_norm": 2.0793190002441406, "learning_rate": 4.263496336174668e-05, "loss": 0.0257, "step": 49300 }, { "epoch": 14.77, "grad_norm": 0.9795339107513428, "learning_rate": 4.262000897263347e-05, "loss": 0.019, "step": 49400 }, { "epoch": 14.8, "grad_norm": 0.49018004536628723, "learning_rate": 4.260505458352026e-05, "loss": 0.0207, "step": 49500 }, { "epoch": 14.83, "grad_norm": 0.22400274872779846, "learning_rate": 4.259010019440706e-05, "loss": 0.0212, "step": 49600 }, { "epoch": 14.86, "grad_norm": 0.8345464468002319, "learning_rate": 4.2575145805293855e-05, "loss": 0.0182, "step": 49700 }, { "epoch": 14.89, "grad_norm": 0.2443341612815857, "learning_rate": 4.256019141618065e-05, "loss": 0.0177, "step": 49800 }, { "epoch": 14.92, "grad_norm": 0.697216272354126, "learning_rate": 4.254523702706745e-05, "loss": 0.0216, "step": 49900 }, { "epoch": 14.95, "grad_norm": 0.5050187706947327, "learning_rate": 4.253028263795424e-05, "loss": 0.0166, "step": 50000 }, { "epoch": 14.95, "eval_loss": 0.282767653465271, "eval_precision": 0.9254008757836374, "eval_recall": 0.9044305551279288, "eval_runtime": 303.3682, "eval_samples_per_second": 44.085, "eval_steps_per_second": 1.378, "step": 50000 }, { "epoch": 14.98, "grad_norm": 0.4018344283103943, "learning_rate": 4.2515328248841034e-05, "loss": 0.02, "step": 50100 }, { "epoch": 15.01, "grad_norm": 2.2681732177734375, "learning_rate": 4.250037385972783e-05, "loss": 0.0169, "step": 50200 }, { "epoch": 15.04, "grad_norm": 0.18065716326236725, "learning_rate": 4.2485419470614626e-05, "loss": 0.0163, "step": 50300 }, { "epoch": 15.07, "grad_norm": 1.0265353918075562, "learning_rate": 4.247046508150142e-05, "loss": 0.0201, "step": 50400 }, { "epoch": 15.1, "grad_norm": 1.7455101013183594, "learning_rate": 4.245551069238822e-05, "loss": 0.0174, "step": 50500 }, { "epoch": 15.13, "grad_norm": 0.03697839379310608, "learning_rate": 4.244055630327501e-05, "loss": 0.021, "step": 50600 }, { "epoch": 15.16, "grad_norm": 0.10842275619506836, "learning_rate": 4.242560191416181e-05, "loss": 0.0196, "step": 50700 }, { "epoch": 15.19, "grad_norm": 0.6541497111320496, "learning_rate": 4.2410647525048604e-05, "loss": 0.019, "step": 50800 }, { "epoch": 15.22, "grad_norm": 1.3006408214569092, "learning_rate": 4.23956931359354e-05, "loss": 0.0178, "step": 50900 }, { "epoch": 15.25, "grad_norm": 0.6021150350570679, "learning_rate": 4.23807387468222e-05, "loss": 0.0199, "step": 51000 }, { "epoch": 15.25, "eval_loss": 0.2640076279640198, "eval_precision": 0.9301819557882123, "eval_recall": 0.9081868284122048, "eval_runtime": 302.9987, "eval_samples_per_second": 44.139, "eval_steps_per_second": 1.38, "step": 51000 }, { "epoch": 15.28, "grad_norm": 0.8783787488937378, "learning_rate": 4.236578435770899e-05, "loss": 0.0175, "step": 51100 }, { "epoch": 15.31, "grad_norm": 0.18405625224113464, "learning_rate": 4.235082996859578e-05, "loss": 0.0152, "step": 51200 }, { "epoch": 15.34, "grad_norm": 0.03877532109618187, "learning_rate": 4.233587557948258e-05, "loss": 0.0174, "step": 51300 }, { "epoch": 15.37, "grad_norm": 0.3079793155193329, "learning_rate": 4.2320921190369375e-05, "loss": 0.015, "step": 51400 }, { "epoch": 15.4, "grad_norm": 0.9296764731407166, "learning_rate": 4.230596680125617e-05, "loss": 0.0177, "step": 51500 }, { "epoch": 15.43, "grad_norm": 0.7762422561645508, "learning_rate": 4.229101241214297e-05, "loss": 0.0195, "step": 51600 }, { "epoch": 15.46, "grad_norm": 2.472615957260132, "learning_rate": 4.227605802302976e-05, "loss": 0.0195, "step": 51700 }, { "epoch": 15.49, "grad_norm": 2.8045852184295654, "learning_rate": 4.226110363391655e-05, "loss": 0.0201, "step": 51800 }, { "epoch": 15.52, "grad_norm": 0.053874421864748, "learning_rate": 4.224614924480335e-05, "loss": 0.018, "step": 51900 }, { "epoch": 15.55, "grad_norm": 0.3398553729057312, "learning_rate": 4.2231194855690146e-05, "loss": 0.0167, "step": 52000 }, { "epoch": 15.55, "eval_loss": 0.2754287123680115, "eval_precision": 0.927292017724521, "eval_recall": 0.914929646848733, "eval_runtime": 302.6973, "eval_samples_per_second": 44.183, "eval_steps_per_second": 1.381, "step": 52000 }, { "epoch": 15.58, "grad_norm": 1.1841187477111816, "learning_rate": 4.2216240466576945e-05, "loss": 0.0157, "step": 52100 }, { "epoch": 15.61, "grad_norm": 1.0184565782546997, "learning_rate": 4.220128607746374e-05, "loss": 0.0145, "step": 52200 }, { "epoch": 15.64, "grad_norm": 0.6707783937454224, "learning_rate": 4.218633168835053e-05, "loss": 0.0215, "step": 52300 }, { "epoch": 15.67, "grad_norm": 0.8084210157394409, "learning_rate": 4.217137729923733e-05, "loss": 0.0185, "step": 52400 }, { "epoch": 15.7, "grad_norm": 0.24998579919338226, "learning_rate": 4.2156422910124124e-05, "loss": 0.0192, "step": 52500 }, { "epoch": 15.73, "grad_norm": 0.11048603802919388, "learning_rate": 4.214146852101092e-05, "loss": 0.0177, "step": 52600 }, { "epoch": 15.76, "grad_norm": 0.8540931940078735, "learning_rate": 4.2126514131897716e-05, "loss": 0.018, "step": 52700 }, { "epoch": 15.79, "grad_norm": 0.3726775646209717, "learning_rate": 4.211155974278451e-05, "loss": 0.0181, "step": 52800 }, { "epoch": 15.82, "grad_norm": 0.13543102145195007, "learning_rate": 4.20966053536713e-05, "loss": 0.0201, "step": 52900 }, { "epoch": 15.85, "grad_norm": 0.3862367570400238, "learning_rate": 4.20816509645581e-05, "loss": 0.0184, "step": 53000 }, { "epoch": 15.85, "eval_loss": 0.2746909558773041, "eval_precision": 0.9290507850298093, "eval_recall": 0.9164075248622187, "eval_runtime": 304.9585, "eval_samples_per_second": 43.855, "eval_steps_per_second": 1.371, "step": 53000 }, { "epoch": 15.88, "grad_norm": 0.5059983730316162, "learning_rate": 4.2066696575444895e-05, "loss": 0.0182, "step": 53100 }, { "epoch": 15.91, "grad_norm": 0.45346036553382874, "learning_rate": 4.205174218633169e-05, "loss": 0.0208, "step": 53200 }, { "epoch": 15.94, "grad_norm": 1.0658683776855469, "learning_rate": 4.203678779721849e-05, "loss": 0.0149, "step": 53300 }, { "epoch": 15.97, "grad_norm": 0.2168959081172943, "learning_rate": 4.202183340810528e-05, "loss": 0.0191, "step": 53400 }, { "epoch": 16.0, "grad_norm": 1.7620713710784912, "learning_rate": 4.200687901899207e-05, "loss": 0.0219, "step": 53500 }, { "epoch": 16.03, "grad_norm": 0.33198004961013794, "learning_rate": 4.199192462987887e-05, "loss": 0.014, "step": 53600 }, { "epoch": 16.06, "grad_norm": 3.614070415496826, "learning_rate": 4.1976970240765665e-05, "loss": 0.0132, "step": 53700 }, { "epoch": 16.09, "grad_norm": 0.7846044898033142, "learning_rate": 4.1962015851652465e-05, "loss": 0.014, "step": 53800 }, { "epoch": 16.12, "grad_norm": 1.2382973432540894, "learning_rate": 4.194706146253926e-05, "loss": 0.0198, "step": 53900 }, { "epoch": 16.15, "grad_norm": 1.7487576007843018, "learning_rate": 4.193210707342605e-05, "loss": 0.0156, "step": 54000 }, { "epoch": 16.15, "eval_loss": 0.27493321895599365, "eval_precision": 0.926791958041958, "eval_recall": 0.9140367622155855, "eval_runtime": 304.8434, "eval_samples_per_second": 43.872, "eval_steps_per_second": 1.371, "step": 54000 }, { "epoch": 16.18, "grad_norm": 2.473257541656494, "learning_rate": 4.191715268431285e-05, "loss": 0.0144, "step": 54100 }, { "epoch": 16.21, "grad_norm": 1.7735458612442017, "learning_rate": 4.1902198295199643e-05, "loss": 0.0128, "step": 54200 }, { "epoch": 16.24, "grad_norm": 0.09201900660991669, "learning_rate": 4.1887243906086436e-05, "loss": 0.0121, "step": 54300 }, { "epoch": 16.27, "grad_norm": 4.265335559844971, "learning_rate": 4.1872289516973236e-05, "loss": 0.0193, "step": 54400 }, { "epoch": 16.3, "grad_norm": 0.05550719425082207, "learning_rate": 4.185733512786003e-05, "loss": 0.0191, "step": 54500 }, { "epoch": 16.33, "grad_norm": 1.2244312763214111, "learning_rate": 4.184238073874682e-05, "loss": 0.0144, "step": 54600 }, { "epoch": 16.36, "grad_norm": 0.11609119921922684, "learning_rate": 4.182742634963362e-05, "loss": 0.0195, "step": 54700 }, { "epoch": 16.39, "grad_norm": 0.7442992329597473, "learning_rate": 4.1812471960520414e-05, "loss": 0.0161, "step": 54800 }, { "epoch": 16.42, "grad_norm": 1.913397192955017, "learning_rate": 4.179751757140721e-05, "loss": 0.017, "step": 54900 }, { "epoch": 16.45, "grad_norm": 1.5975757837295532, "learning_rate": 4.178256318229401e-05, "loss": 0.0131, "step": 55000 }, { "epoch": 16.45, "eval_loss": 0.28440138697624207, "eval_precision": 0.9323552610821896, "eval_recall": 0.9098494411773762, "eval_runtime": 302.3846, "eval_samples_per_second": 44.228, "eval_steps_per_second": 1.382, "step": 55000 }, { "epoch": 16.48, "grad_norm": 0.02616269886493683, "learning_rate": 4.17676087931808e-05, "loss": 0.0166, "step": 55100 }, { "epoch": 16.51, "grad_norm": 0.270749032497406, "learning_rate": 4.17526544040676e-05, "loss": 0.0167, "step": 55200 }, { "epoch": 16.54, "grad_norm": 0.8699542880058289, "learning_rate": 4.173770001495439e-05, "loss": 0.0178, "step": 55300 }, { "epoch": 16.57, "grad_norm": 0.15558452904224396, "learning_rate": 4.1722745625841185e-05, "loss": 0.0155, "step": 55400 }, { "epoch": 16.6, "grad_norm": 1.3881036043167114, "learning_rate": 4.1707791236727985e-05, "loss": 0.0162, "step": 55500 }, { "epoch": 16.63, "grad_norm": 1.0590258836746216, "learning_rate": 4.169283684761478e-05, "loss": 0.019, "step": 55600 }, { "epoch": 16.66, "grad_norm": 0.6527047157287598, "learning_rate": 4.167788245850157e-05, "loss": 0.0162, "step": 55700 }, { "epoch": 16.69, "grad_norm": 0.7468928694725037, "learning_rate": 4.166292806938837e-05, "loss": 0.0187, "step": 55800 }, { "epoch": 16.72, "grad_norm": 1.1580772399902344, "learning_rate": 4.164797368027516e-05, "loss": 0.0152, "step": 55900 }, { "epoch": 16.75, "grad_norm": 0.27484288811683655, "learning_rate": 4.1633019291161956e-05, "loss": 0.018, "step": 56000 }, { "epoch": 16.75, "eval_loss": 0.2911526560783386, "eval_precision": 0.9246059786783004, "eval_recall": 0.9265987253302134, "eval_runtime": 304.1503, "eval_samples_per_second": 43.972, "eval_steps_per_second": 1.374, "step": 56000 }, { "epoch": 16.78, "grad_norm": 0.12976956367492676, "learning_rate": 4.1618064902048756e-05, "loss": 0.0185, "step": 56100 }, { "epoch": 16.81, "grad_norm": 0.37897953391075134, "learning_rate": 4.160311051293555e-05, "loss": 0.0152, "step": 56200 }, { "epoch": 16.84, "grad_norm": 0.07681228220462799, "learning_rate": 4.158815612382234e-05, "loss": 0.0163, "step": 56300 }, { "epoch": 16.87, "grad_norm": 0.5966798663139343, "learning_rate": 4.157320173470914e-05, "loss": 0.014, "step": 56400 }, { "epoch": 16.9, "grad_norm": 0.29120373725891113, "learning_rate": 4.1558247345595934e-05, "loss": 0.018, "step": 56500 }, { "epoch": 16.93, "grad_norm": 0.4325448274612427, "learning_rate": 4.1543292956482734e-05, "loss": 0.0145, "step": 56600 }, { "epoch": 16.96, "grad_norm": 1.473797082901001, "learning_rate": 4.1528338567369527e-05, "loss": 0.0164, "step": 56700 }, { "epoch": 16.99, "grad_norm": 0.963238537311554, "learning_rate": 4.151338417825632e-05, "loss": 0.0168, "step": 56800 }, { "epoch": 17.02, "grad_norm": 1.2749171257019043, "learning_rate": 4.149842978914312e-05, "loss": 0.0172, "step": 56900 }, { "epoch": 17.05, "grad_norm": 0.1201496422290802, "learning_rate": 4.148347540002991e-05, "loss": 0.0132, "step": 57000 }, { "epoch": 17.05, "eval_loss": 0.2895963788032532, "eval_precision": 0.9242246747641655, "eval_recall": 0.9230579759229041, "eval_runtime": 304.3955, "eval_samples_per_second": 43.936, "eval_steps_per_second": 1.373, "step": 57000 }, { "epoch": 17.08, "grad_norm": 0.0923817902803421, "learning_rate": 4.1468521010916705e-05, "loss": 0.0155, "step": 57100 }, { "epoch": 17.11, "grad_norm": 0.17687027156352997, "learning_rate": 4.1453566621803505e-05, "loss": 0.0142, "step": 57200 }, { "epoch": 17.14, "grad_norm": 0.5095121264457703, "learning_rate": 4.14386122326903e-05, "loss": 0.0122, "step": 57300 }, { "epoch": 17.17, "grad_norm": 0.14807282388210297, "learning_rate": 4.142365784357709e-05, "loss": 0.0122, "step": 57400 }, { "epoch": 17.19, "grad_norm": 0.22806455194950104, "learning_rate": 4.140870345446389e-05, "loss": 0.0126, "step": 57500 }, { "epoch": 17.22, "grad_norm": 0.1654992550611496, "learning_rate": 4.139374906535068e-05, "loss": 0.012, "step": 57600 }, { "epoch": 17.25, "grad_norm": 1.1821808815002441, "learning_rate": 4.1378794676237476e-05, "loss": 0.0154, "step": 57700 }, { "epoch": 17.28, "grad_norm": 0.33708083629608154, "learning_rate": 4.1363840287124275e-05, "loss": 0.0118, "step": 57800 }, { "epoch": 17.31, "grad_norm": 0.2778627276420593, "learning_rate": 4.134888589801107e-05, "loss": 0.0153, "step": 57900 }, { "epoch": 17.34, "grad_norm": 0.4350825250148773, "learning_rate": 4.133393150889787e-05, "loss": 0.0131, "step": 58000 }, { "epoch": 17.34, "eval_loss": 0.2985839247703552, "eval_precision": 0.9294326572576876, "eval_recall": 0.9185011853813233, "eval_runtime": 303.6403, "eval_samples_per_second": 44.046, "eval_steps_per_second": 1.377, "step": 58000 }, { "epoch": 17.37, "grad_norm": 1.0241811275482178, "learning_rate": 4.131897711978466e-05, "loss": 0.0152, "step": 58100 }, { "epoch": 17.4, "grad_norm": 0.705042839050293, "learning_rate": 4.1304022730671454e-05, "loss": 0.0165, "step": 58200 }, { "epoch": 17.43, "grad_norm": 0.9130484461784363, "learning_rate": 4.128906834155825e-05, "loss": 0.0143, "step": 58300 }, { "epoch": 17.46, "grad_norm": 0.0633108988404274, "learning_rate": 4.127411395244504e-05, "loss": 0.0147, "step": 58400 }, { "epoch": 17.49, "grad_norm": 1.2173391580581665, "learning_rate": 4.125915956333184e-05, "loss": 0.0134, "step": 58500 }, { "epoch": 17.52, "grad_norm": 2.9922380447387695, "learning_rate": 4.124420517421864e-05, "loss": 0.0145, "step": 58600 }, { "epoch": 17.55, "grad_norm": 0.015288499183952808, "learning_rate": 4.1229250785105425e-05, "loss": 0.0169, "step": 58700 }, { "epoch": 17.58, "grad_norm": 1.87058424949646, "learning_rate": 4.1214296395992225e-05, "loss": 0.0158, "step": 58800 }, { "epoch": 17.61, "grad_norm": 0.31113335490226746, "learning_rate": 4.1199342006879024e-05, "loss": 0.0151, "step": 58900 }, { "epoch": 17.64, "grad_norm": 0.8044542670249939, "learning_rate": 4.118438761776582e-05, "loss": 0.0143, "step": 59000 }, { "epoch": 17.64, "eval_loss": 0.2973649501800537, "eval_precision": 0.9298240060774879, "eval_recall": 0.9044305551279288, "eval_runtime": 302.1441, "eval_samples_per_second": 44.264, "eval_steps_per_second": 1.383, "step": 59000 }, { "epoch": 17.67, "grad_norm": 0.08827254921197891, "learning_rate": 4.116943322865261e-05, "loss": 0.0157, "step": 59100 }, { "epoch": 17.7, "grad_norm": 1.8845312595367432, "learning_rate": 4.115447883953941e-05, "loss": 0.0155, "step": 59200 }, { "epoch": 17.73, "grad_norm": 0.49602124094963074, "learning_rate": 4.11395244504262e-05, "loss": 0.0162, "step": 59300 }, { "epoch": 17.76, "grad_norm": 0.3592805564403534, "learning_rate": 4.1124570061312995e-05, "loss": 0.0149, "step": 59400 }, { "epoch": 17.79, "grad_norm": 1.320101261138916, "learning_rate": 4.110961567219979e-05, "loss": 0.0156, "step": 59500 }, { "epoch": 17.82, "grad_norm": 0.4389740526676178, "learning_rate": 4.109466128308659e-05, "loss": 0.0151, "step": 59600 }, { "epoch": 17.85, "grad_norm": 1.6578569412231445, "learning_rate": 4.107970689397339e-05, "loss": 0.0166, "step": 59700 }, { "epoch": 17.88, "grad_norm": 1.7992475032806396, "learning_rate": 4.1064752504860174e-05, "loss": 0.0148, "step": 59800 }, { "epoch": 17.91, "grad_norm": 0.026478100568056107, "learning_rate": 4.1049798115746973e-05, "loss": 0.0158, "step": 59900 }, { "epoch": 17.94, "grad_norm": 2.8473379611968994, "learning_rate": 4.103484372663377e-05, "loss": 0.0159, "step": 60000 }, { "epoch": 17.94, "eval_loss": 0.2935677468776703, "eval_precision": 0.9302795129030222, "eval_recall": 0.9079097262846763, "eval_runtime": 302.5843, "eval_samples_per_second": 44.199, "eval_steps_per_second": 1.381, "step": 60000 }, { "epoch": 17.97, "grad_norm": 2.1734695434570312, "learning_rate": 4.101988933752056e-05, "loss": 0.0183, "step": 60100 }, { "epoch": 18.0, "grad_norm": 0.14518772065639496, "learning_rate": 4.100493494840736e-05, "loss": 0.0172, "step": 60200 }, { "epoch": 18.03, "grad_norm": 0.3986850380897522, "learning_rate": 4.098998055929416e-05, "loss": 0.0101, "step": 60300 }, { "epoch": 18.06, "grad_norm": 1.78749680519104, "learning_rate": 4.097502617018095e-05, "loss": 0.0123, "step": 60400 }, { "epoch": 18.09, "grad_norm": 0.43207836151123047, "learning_rate": 4.0960071781067744e-05, "loss": 0.0132, "step": 60500 }, { "epoch": 18.12, "grad_norm": 0.11268942803144455, "learning_rate": 4.0945117391954544e-05, "loss": 0.0131, "step": 60600 }, { "epoch": 18.15, "grad_norm": 0.5929433107376099, "learning_rate": 4.093016300284134e-05, "loss": 0.0118, "step": 60700 }, { "epoch": 18.18, "grad_norm": 0.012462102808058262, "learning_rate": 4.091520861372813e-05, "loss": 0.0114, "step": 60800 }, { "epoch": 18.21, "grad_norm": 0.03992025554180145, "learning_rate": 4.090025422461492e-05, "loss": 0.0123, "step": 60900 }, { "epoch": 18.24, "grad_norm": 0.2556318938732147, "learning_rate": 4.088529983550172e-05, "loss": 0.0163, "step": 61000 }, { "epoch": 18.24, "eval_loss": 0.3005661070346832, "eval_precision": 0.930046845034112, "eval_recall": 0.9108039040610856, "eval_runtime": 303.0262, "eval_samples_per_second": 44.135, "eval_steps_per_second": 1.379, "step": 61000 }, { "epoch": 18.27, "grad_norm": 0.0933234691619873, "learning_rate": 4.087034544638852e-05, "loss": 0.0139, "step": 61100 }, { "epoch": 18.3, "grad_norm": 4.561667442321777, "learning_rate": 4.085539105727531e-05, "loss": 0.015, "step": 61200 }, { "epoch": 18.33, "grad_norm": 1.8393715620040894, "learning_rate": 4.084043666816211e-05, "loss": 0.0113, "step": 61300 }, { "epoch": 18.36, "grad_norm": 0.5815320611000061, "learning_rate": 4.082548227904891e-05, "loss": 0.0158, "step": 61400 }, { "epoch": 18.39, "grad_norm": 0.9265565872192383, "learning_rate": 4.0810527889935693e-05, "loss": 0.0165, "step": 61500 }, { "epoch": 18.42, "grad_norm": 0.029577825218439102, "learning_rate": 4.079557350082249e-05, "loss": 0.0151, "step": 61600 }, { "epoch": 18.45, "grad_norm": 0.13609355688095093, "learning_rate": 4.078061911170929e-05, "loss": 0.0147, "step": 61700 }, { "epoch": 18.48, "grad_norm": 0.2505282461643219, "learning_rate": 4.0765664722596086e-05, "loss": 0.0117, "step": 61800 }, { "epoch": 18.51, "grad_norm": 0.49616509675979614, "learning_rate": 4.075071033348288e-05, "loss": 0.0136, "step": 61900 }, { "epoch": 18.54, "grad_norm": 1.4143670797348022, "learning_rate": 4.073575594436967e-05, "loss": 0.0199, "step": 62000 }, { "epoch": 18.54, "eval_loss": 0.28239989280700684, "eval_precision": 0.9322552865754473, "eval_recall": 0.89993534283691, "eval_runtime": 303.1737, "eval_samples_per_second": 44.113, "eval_steps_per_second": 1.379, "step": 62000 }, { "epoch": 18.57, "grad_norm": 2.5461013317108154, "learning_rate": 4.072080155525647e-05, "loss": 0.0122, "step": 62100 }, { "epoch": 18.6, "grad_norm": 0.3786807358264923, "learning_rate": 4.0705847166143264e-05, "loss": 0.0122, "step": 62200 }, { "epoch": 18.63, "grad_norm": 1.546884536743164, "learning_rate": 4.069089277703006e-05, "loss": 0.0133, "step": 62300 }, { "epoch": 18.66, "grad_norm": 0.04791215434670448, "learning_rate": 4.0675938387916856e-05, "loss": 0.0118, "step": 62400 }, { "epoch": 18.69, "grad_norm": 0.12534143030643463, "learning_rate": 4.0660983998803656e-05, "loss": 0.0145, "step": 62500 }, { "epoch": 18.72, "grad_norm": 1.358917474746704, "learning_rate": 4.064602960969044e-05, "loss": 0.0152, "step": 62600 }, { "epoch": 18.75, "grad_norm": 0.10757000744342804, "learning_rate": 4.063107522057724e-05, "loss": 0.0155, "step": 62700 }, { "epoch": 18.78, "grad_norm": 2.365614652633667, "learning_rate": 4.061612083146404e-05, "loss": 0.0156, "step": 62800 }, { "epoch": 18.81, "grad_norm": 0.4936872124671936, "learning_rate": 4.060116644235083e-05, "loss": 0.0132, "step": 62900 }, { "epoch": 18.84, "grad_norm": 0.022019200026988983, "learning_rate": 4.058621205323763e-05, "loss": 0.0124, "step": 63000 }, { "epoch": 18.84, "eval_loss": 0.30277740955352783, "eval_precision": 0.930499515185637, "eval_recall": 0.9159456879830044, "eval_runtime": 304.0566, "eval_samples_per_second": 43.985, "eval_steps_per_second": 1.375, "step": 63000 }, { "epoch": 18.87, "grad_norm": 0.3624964654445648, "learning_rate": 4.057125766412442e-05, "loss": 0.0155, "step": 63100 }, { "epoch": 18.9, "grad_norm": 1.7629303932189941, "learning_rate": 4.055630327501122e-05, "loss": 0.0139, "step": 63200 }, { "epoch": 18.93, "grad_norm": 0.18042436242103577, "learning_rate": 4.054134888589801e-05, "loss": 0.0179, "step": 63300 }, { "epoch": 18.96, "grad_norm": 0.20951129496097565, "learning_rate": 4.0526394496784806e-05, "loss": 0.0172, "step": 63400 }, { "epoch": 18.99, "grad_norm": 0.8891457915306091, "learning_rate": 4.0511440107671605e-05, "loss": 0.0126, "step": 63500 }, { "epoch": 19.02, "grad_norm": 0.22427305579185486, "learning_rate": 4.04964857185584e-05, "loss": 0.0112, "step": 63600 }, { "epoch": 19.05, "grad_norm": 0.25893327593803406, "learning_rate": 4.048153132944519e-05, "loss": 0.0123, "step": 63700 }, { "epoch": 19.08, "grad_norm": 1.579196810722351, "learning_rate": 4.046657694033199e-05, "loss": 0.0117, "step": 63800 }, { "epoch": 19.11, "grad_norm": 1.801465630531311, "learning_rate": 4.045162255121879e-05, "loss": 0.0113, "step": 63900 }, { "epoch": 19.14, "grad_norm": 3.969907522201538, "learning_rate": 4.0436668162105577e-05, "loss": 0.0132, "step": 64000 }, { "epoch": 19.14, "eval_loss": 0.3150152266025543, "eval_precision": 0.9289555972482801, "eval_recall": 0.9146833338464854, "eval_runtime": 304.0309, "eval_samples_per_second": 43.989, "eval_steps_per_second": 1.375, "step": 64000 }, { "epoch": 19.17, "grad_norm": 1.5782831907272339, "learning_rate": 4.0421713772992376e-05, "loss": 0.0106, "step": 64100 }, { "epoch": 19.2, "grad_norm": 1.0305448770523071, "learning_rate": 4.0406759383879176e-05, "loss": 0.0115, "step": 64200 }, { "epoch": 19.23, "grad_norm": 0.8879725337028503, "learning_rate": 4.039180499476596e-05, "loss": 0.0108, "step": 64300 }, { "epoch": 19.26, "grad_norm": 1.0525989532470703, "learning_rate": 4.037685060565276e-05, "loss": 0.0113, "step": 64400 }, { "epoch": 19.29, "grad_norm": 0.19859521090984344, "learning_rate": 4.0361896216539554e-05, "loss": 0.011, "step": 64500 }, { "epoch": 19.32, "grad_norm": 1.628808856010437, "learning_rate": 4.034694182742635e-05, "loss": 0.0126, "step": 64600 }, { "epoch": 19.35, "grad_norm": 0.45845118165016174, "learning_rate": 4.033198743831315e-05, "loss": 0.0117, "step": 64700 }, { "epoch": 19.38, "grad_norm": 0.02105000615119934, "learning_rate": 4.031703304919994e-05, "loss": 0.0103, "step": 64800 }, { "epoch": 19.41, "grad_norm": 1.2173235416412354, "learning_rate": 4.030207866008674e-05, "loss": 0.013, "step": 64900 }, { "epoch": 19.44, "grad_norm": 1.0716986656188965, "learning_rate": 4.028712427097353e-05, "loss": 0.0136, "step": 65000 }, { "epoch": 19.44, "eval_loss": 0.30169057846069336, "eval_precision": 0.9307780320366132, "eval_recall": 0.9016903229779242, "eval_runtime": 303.9363, "eval_samples_per_second": 44.003, "eval_steps_per_second": 1.375, "step": 65000 }, { "epoch": 19.47, "grad_norm": 0.060731422156095505, "learning_rate": 4.0272169881860325e-05, "loss": 0.0103, "step": 65100 }, { "epoch": 19.5, "grad_norm": 1.8369615077972412, "learning_rate": 4.0257215492747125e-05, "loss": 0.0149, "step": 65200 }, { "epoch": 19.53, "grad_norm": 0.5922613143920898, "learning_rate": 4.024226110363392e-05, "loss": 0.0137, "step": 65300 }, { "epoch": 19.56, "grad_norm": 1.1230493783950806, "learning_rate": 4.022730671452071e-05, "loss": 0.016, "step": 65400 }, { "epoch": 19.59, "grad_norm": 0.9484757781028748, "learning_rate": 4.021235232540751e-05, "loss": 0.0126, "step": 65500 }, { "epoch": 19.62, "grad_norm": 0.40328437089920044, "learning_rate": 4.01973979362943e-05, "loss": 0.014, "step": 65600 }, { "epoch": 19.65, "grad_norm": 1.251897931098938, "learning_rate": 4.0182443547181096e-05, "loss": 0.0152, "step": 65700 }, { "epoch": 19.68, "grad_norm": 0.06640147417783737, "learning_rate": 4.0167489158067896e-05, "loss": 0.0119, "step": 65800 }, { "epoch": 19.71, "grad_norm": 0.08419325947761536, "learning_rate": 4.015253476895469e-05, "loss": 0.0104, "step": 65900 }, { "epoch": 19.74, "grad_norm": 0.8898499011993408, "learning_rate": 4.013758037984148e-05, "loss": 0.013, "step": 66000 }, { "epoch": 19.74, "eval_loss": 0.30586904287338257, "eval_precision": 0.9286385564814235, "eval_recall": 0.9127128298285045, "eval_runtime": 303.8354, "eval_samples_per_second": 44.017, "eval_steps_per_second": 1.376, "step": 66000 }, { "epoch": 19.77, "grad_norm": 0.8399672508239746, "learning_rate": 4.012262599072828e-05, "loss": 0.0156, "step": 66100 }, { "epoch": 19.8, "grad_norm": 1.188772201538086, "learning_rate": 4.0107671601615074e-05, "loss": 0.0133, "step": 66200 }, { "epoch": 19.83, "grad_norm": 0.3390734791755676, "learning_rate": 4.0092717212501874e-05, "loss": 0.011, "step": 66300 }, { "epoch": 19.86, "grad_norm": 2.0773940086364746, "learning_rate": 4.007776282338867e-05, "loss": 0.0109, "step": 66400 }, { "epoch": 19.89, "grad_norm": 1.667506456375122, "learning_rate": 4.006280843427546e-05, "loss": 0.0121, "step": 66500 }, { "epoch": 19.92, "grad_norm": 0.036488935351371765, "learning_rate": 4.004785404516226e-05, "loss": 0.0121, "step": 66600 }, { "epoch": 19.95, "grad_norm": 0.9762794375419617, "learning_rate": 4.003289965604905e-05, "loss": 0.0138, "step": 66700 }, { "epoch": 19.98, "grad_norm": 1.04608952999115, "learning_rate": 4.0017945266935845e-05, "loss": 0.0117, "step": 66800 }, { "epoch": 20.01, "grad_norm": 5.332238674163818, "learning_rate": 4.0002990877822645e-05, "loss": 0.0137, "step": 66900 }, { "epoch": 20.04, "grad_norm": 0.01725686341524124, "learning_rate": 3.998803648870944e-05, "loss": 0.0131, "step": 67000 }, { "epoch": 20.04, "eval_loss": 0.2912316620349884, "eval_precision": 0.9311961240797836, "eval_recall": 0.9113273191908617, "eval_runtime": 303.1004, "eval_samples_per_second": 44.124, "eval_steps_per_second": 1.379, "step": 67000 }, { "epoch": 20.07, "grad_norm": 0.0427197702229023, "learning_rate": 3.997308209959623e-05, "loss": 0.0077, "step": 67100 }, { "epoch": 20.1, "grad_norm": 0.017879147082567215, "learning_rate": 3.995812771048303e-05, "loss": 0.0104, "step": 67200 }, { "epoch": 20.13, "grad_norm": 0.07891906797885895, "learning_rate": 3.994317332136982e-05, "loss": 0.0141, "step": 67300 }, { "epoch": 20.16, "grad_norm": 0.16812817752361298, "learning_rate": 3.9928218932256616e-05, "loss": 0.0097, "step": 67400 }, { "epoch": 20.19, "grad_norm": 3.0790505409240723, "learning_rate": 3.9913264543143416e-05, "loss": 0.0106, "step": 67500 }, { "epoch": 20.22, "grad_norm": 0.41399437189102173, "learning_rate": 3.989831015403021e-05, "loss": 0.0089, "step": 67600 }, { "epoch": 20.25, "grad_norm": 0.4379628300666809, "learning_rate": 3.988335576491701e-05, "loss": 0.0086, "step": 67700 }, { "epoch": 20.28, "grad_norm": 0.011956513859331608, "learning_rate": 3.98684013758038e-05, "loss": 0.0133, "step": 67800 }, { "epoch": 20.31, "grad_norm": 2.477144718170166, "learning_rate": 3.9853446986690594e-05, "loss": 0.0091, "step": 67900 }, { "epoch": 20.33, "grad_norm": 2.790292739868164, "learning_rate": 3.9838492597577394e-05, "loss": 0.0128, "step": 68000 }, { "epoch": 20.33, "eval_loss": 0.3076106309890747, "eval_precision": 0.9304780813715294, "eval_recall": 0.9090489239200714, "eval_runtime": 303.9942, "eval_samples_per_second": 43.994, "eval_steps_per_second": 1.375, "step": 68000 }, { "epoch": 20.36, "grad_norm": 1.441587209701538, "learning_rate": 3.9823538208464186e-05, "loss": 0.0159, "step": 68100 }, { "epoch": 20.39, "grad_norm": 1.7005335092544556, "learning_rate": 3.980858381935098e-05, "loss": 0.01, "step": 68200 }, { "epoch": 20.42, "grad_norm": 0.30774638056755066, "learning_rate": 3.979362943023778e-05, "loss": 0.0124, "step": 68300 }, { "epoch": 20.45, "grad_norm": 0.04803008586168289, "learning_rate": 3.977867504112457e-05, "loss": 0.0112, "step": 68400 }, { "epoch": 20.48, "grad_norm": 3.551407814025879, "learning_rate": 3.9763720652011365e-05, "loss": 0.012, "step": 68500 }, { "epoch": 20.51, "grad_norm": 0.037427909672260284, "learning_rate": 3.9748766262898164e-05, "loss": 0.0138, "step": 68600 }, { "epoch": 20.54, "grad_norm": 0.0066105336882174015, "learning_rate": 3.973381187378496e-05, "loss": 0.0114, "step": 68700 }, { "epoch": 20.57, "grad_norm": 0.05352969095110893, "learning_rate": 3.971885748467175e-05, "loss": 0.0106, "step": 68800 }, { "epoch": 20.6, "grad_norm": 1.097419023513794, "learning_rate": 3.970390309555855e-05, "loss": 0.0113, "step": 68900 }, { "epoch": 20.63, "grad_norm": 2.4684622287750244, "learning_rate": 3.968894870644534e-05, "loss": 0.0104, "step": 69000 }, { "epoch": 20.63, "eval_loss": 0.3140137493610382, "eval_precision": 0.9268018018018018, "eval_recall": 0.9122202038240094, "eval_runtime": 304.685, "eval_samples_per_second": 43.895, "eval_steps_per_second": 1.372, "step": 69000 }, { "epoch": 20.66, "grad_norm": 0.03651382029056549, "learning_rate": 3.967399431733214e-05, "loss": 0.0086, "step": 69100 }, { "epoch": 20.69, "grad_norm": 0.35381224751472473, "learning_rate": 3.9659039928218935e-05, "loss": 0.013, "step": 69200 }, { "epoch": 20.72, "grad_norm": 0.06933160871267319, "learning_rate": 3.964408553910573e-05, "loss": 0.0106, "step": 69300 }, { "epoch": 20.75, "grad_norm": 0.4022979140281677, "learning_rate": 3.962913114999253e-05, "loss": 0.013, "step": 69400 }, { "epoch": 20.78, "grad_norm": 0.03529789671301842, "learning_rate": 3.961417676087932e-05, "loss": 0.0156, "step": 69500 }, { "epoch": 20.81, "grad_norm": 0.7010594606399536, "learning_rate": 3.9599222371766114e-05, "loss": 0.0144, "step": 69600 }, { "epoch": 20.84, "grad_norm": 0.37523359060287476, "learning_rate": 3.958426798265291e-05, "loss": 0.0127, "step": 69700 }, { "epoch": 20.87, "grad_norm": 0.1500304788351059, "learning_rate": 3.9569313593539706e-05, "loss": 0.0151, "step": 69800 }, { "epoch": 20.9, "grad_norm": 1.1849136352539062, "learning_rate": 3.95543592044265e-05, "loss": 0.0092, "step": 69900 }, { "epoch": 20.93, "grad_norm": 0.37061455845832825, "learning_rate": 3.95394048153133e-05, "loss": 0.0125, "step": 70000 }, { "epoch": 20.93, "eval_loss": 0.2996491491794586, "eval_precision": 0.9277798530693563, "eval_recall": 0.9176390898734567, "eval_runtime": 305.225, "eval_samples_per_second": 43.817, "eval_steps_per_second": 1.369, "step": 70000 }, { "epoch": 20.96, "grad_norm": 1.1082910299301147, "learning_rate": 3.952445042620009e-05, "loss": 0.0135, "step": 70100 }, { "epoch": 20.99, "grad_norm": 0.21670883893966675, "learning_rate": 3.9509496037086884e-05, "loss": 0.0147, "step": 70200 }, { "epoch": 21.02, "grad_norm": 1.7163949012756348, "learning_rate": 3.9494541647973684e-05, "loss": 0.0074, "step": 70300 }, { "epoch": 21.05, "grad_norm": 0.49197930097579956, "learning_rate": 3.947958725886048e-05, "loss": 0.009, "step": 70400 }, { "epoch": 21.08, "grad_norm": 0.20454080402851105, "learning_rate": 3.946463286974727e-05, "loss": 0.0106, "step": 70500 }, { "epoch": 21.11, "grad_norm": 1.1480427980422974, "learning_rate": 3.944967848063407e-05, "loss": 0.0082, "step": 70600 }, { "epoch": 21.14, "grad_norm": 0.012445613741874695, "learning_rate": 3.943472409152086e-05, "loss": 0.0124, "step": 70700 }, { "epoch": 21.17, "grad_norm": 1.2859218120574951, "learning_rate": 3.941976970240766e-05, "loss": 0.0114, "step": 70800 }, { "epoch": 21.2, "grad_norm": 1.9639800786972046, "learning_rate": 3.9404815313294455e-05, "loss": 0.0094, "step": 70900 }, { "epoch": 21.23, "grad_norm": 0.5322540402412415, "learning_rate": 3.938986092418125e-05, "loss": 0.0127, "step": 71000 }, { "epoch": 21.23, "eval_loss": 0.31439679861068726, "eval_precision": 0.9300875853255618, "eval_recall": 0.918747498383571, "eval_runtime": 305.1026, "eval_samples_per_second": 43.834, "eval_steps_per_second": 1.37, "step": 71000 }, { "epoch": 21.26, "grad_norm": 0.7698822021484375, "learning_rate": 3.937490653506805e-05, "loss": 0.0091, "step": 71100 }, { "epoch": 21.29, "grad_norm": 0.058869846165180206, "learning_rate": 3.935995214595484e-05, "loss": 0.0116, "step": 71200 }, { "epoch": 21.32, "grad_norm": 0.040317438542842865, "learning_rate": 3.934499775684163e-05, "loss": 0.0082, "step": 71300 }, { "epoch": 21.35, "grad_norm": 0.3180629014968872, "learning_rate": 3.933004336772843e-05, "loss": 0.0086, "step": 71400 }, { "epoch": 21.38, "grad_norm": 0.14002850651741028, "learning_rate": 3.9315088978615226e-05, "loss": 0.0083, "step": 71500 }, { "epoch": 21.41, "grad_norm": 0.535882830619812, "learning_rate": 3.930013458950202e-05, "loss": 0.0083, "step": 71600 }, { "epoch": 21.44, "grad_norm": 0.8898109793663025, "learning_rate": 3.928518020038882e-05, "loss": 0.0111, "step": 71700 }, { "epoch": 21.47, "grad_norm": 7.178394317626953, "learning_rate": 3.927022581127561e-05, "loss": 0.0111, "step": 71800 }, { "epoch": 21.5, "grad_norm": 0.03290112316608429, "learning_rate": 3.9255271422162404e-05, "loss": 0.0102, "step": 71900 }, { "epoch": 21.53, "grad_norm": 0.013704554177820683, "learning_rate": 3.9240317033049204e-05, "loss": 0.0131, "step": 72000 }, { "epoch": 21.53, "eval_loss": 0.30643701553344727, "eval_precision": 0.9271496444430644, "eval_recall": 0.9192709135133471, "eval_runtime": 304.1697, "eval_samples_per_second": 43.969, "eval_steps_per_second": 1.374, "step": 72000 }, { "epoch": 21.56, "grad_norm": 0.8118484020233154, "learning_rate": 3.9225362643936e-05, "loss": 0.0109, "step": 72100 }, { "epoch": 21.59, "grad_norm": 0.8789449334144592, "learning_rate": 3.9210408254822796e-05, "loss": 0.0111, "step": 72200 }, { "epoch": 21.62, "grad_norm": 1.8666021823883057, "learning_rate": 3.919545386570959e-05, "loss": 0.0112, "step": 72300 }, { "epoch": 21.65, "grad_norm": 0.33622369170188904, "learning_rate": 3.918049947659638e-05, "loss": 0.0121, "step": 72400 }, { "epoch": 21.68, "grad_norm": 1.5097126960754395, "learning_rate": 3.916554508748318e-05, "loss": 0.0104, "step": 72500 }, { "epoch": 21.71, "grad_norm": 1.3149192333221436, "learning_rate": 3.915059069836997e-05, "loss": 0.01, "step": 72600 }, { "epoch": 21.74, "grad_norm": 1.1172950267791748, "learning_rate": 3.913563630925677e-05, "loss": 0.0159, "step": 72700 }, { "epoch": 21.77, "grad_norm": 0.7861026525497437, "learning_rate": 3.912068192014357e-05, "loss": 0.0102, "step": 72800 }, { "epoch": 21.8, "grad_norm": 0.9385488033294678, "learning_rate": 3.910572753103036e-05, "loss": 0.0103, "step": 72900 }, { "epoch": 21.83, "grad_norm": 0.2858407199382782, "learning_rate": 3.909077314191715e-05, "loss": 0.0095, "step": 73000 }, { "epoch": 21.83, "eval_loss": 0.3220088481903076, "eval_precision": 0.9313063063063063, "eval_recall": 0.89119123125712, "eval_runtime": 301.1978, "eval_samples_per_second": 44.403, "eval_steps_per_second": 1.388, "step": 73000 }, { "epoch": 21.86, "grad_norm": 2.1585566997528076, "learning_rate": 3.907581875280395e-05, "loss": 0.0107, "step": 73100 }, { "epoch": 21.89, "grad_norm": 0.21467708051204681, "learning_rate": 3.9060864363690745e-05, "loss": 0.0092, "step": 73200 }, { "epoch": 21.92, "grad_norm": 0.0250945333391428, "learning_rate": 3.904590997457754e-05, "loss": 0.0095, "step": 73300 }, { "epoch": 21.95, "grad_norm": 0.08200676739215851, "learning_rate": 3.903095558546434e-05, "loss": 0.0127, "step": 73400 }, { "epoch": 21.98, "grad_norm": 7.951723098754883, "learning_rate": 3.901600119635113e-05, "loss": 0.0118, "step": 73500 }, { "epoch": 22.01, "grad_norm": 0.042703770101070404, "learning_rate": 3.900104680723793e-05, "loss": 0.0086, "step": 73600 }, { "epoch": 22.04, "grad_norm": 0.13317295908927917, "learning_rate": 3.898609241812472e-05, "loss": 0.0117, "step": 73700 }, { "epoch": 22.07, "grad_norm": 0.09529834240674973, "learning_rate": 3.8971138029011516e-05, "loss": 0.0077, "step": 73800 }, { "epoch": 22.1, "grad_norm": 1.2312837839126587, "learning_rate": 3.8956183639898316e-05, "loss": 0.01, "step": 73900 }, { "epoch": 22.13, "grad_norm": 0.20264630019664764, "learning_rate": 3.89412292507851e-05, "loss": 0.0079, "step": 74000 }, { "epoch": 22.13, "eval_loss": 0.3207722306251526, "eval_precision": 0.9257851445663011, "eval_recall": 0.9148680685981712, "eval_runtime": 304.4363, "eval_samples_per_second": 43.93, "eval_steps_per_second": 1.373, "step": 74000 }, { "epoch": 22.16, "grad_norm": 0.007298531476408243, "learning_rate": 3.89262748616719e-05, "loss": 0.0083, "step": 74100 }, { "epoch": 22.19, "grad_norm": 0.030803361907601357, "learning_rate": 3.89113204725587e-05, "loss": 0.0128, "step": 74200 }, { "epoch": 22.22, "grad_norm": 0.04404568299651146, "learning_rate": 3.8896366083445494e-05, "loss": 0.0094, "step": 74300 }, { "epoch": 22.25, "grad_norm": 0.14884673058986664, "learning_rate": 3.888141169433229e-05, "loss": 0.0081, "step": 74400 }, { "epoch": 22.28, "grad_norm": 0.07467024773359299, "learning_rate": 3.886645730521909e-05, "loss": 0.0144, "step": 74500 }, { "epoch": 22.31, "grad_norm": 0.6713554859161377, "learning_rate": 3.885150291610588e-05, "loss": 0.0136, "step": 74600 }, { "epoch": 22.34, "grad_norm": 0.16354040801525116, "learning_rate": 3.883654852699267e-05, "loss": 0.0109, "step": 74700 }, { "epoch": 22.37, "grad_norm": 1.4964691400527954, "learning_rate": 3.882159413787947e-05, "loss": 0.0116, "step": 74800 }, { "epoch": 22.4, "grad_norm": 1.4973292350769043, "learning_rate": 3.8806639748766265e-05, "loss": 0.008, "step": 74900 }, { "epoch": 22.43, "grad_norm": 0.17059992253780365, "learning_rate": 3.8791685359653065e-05, "loss": 0.0111, "step": 75000 }, { "epoch": 22.43, "eval_loss": 0.30246666073799133, "eval_precision": 0.9313384217417686, "eval_recall": 0.8979032605683672, "eval_runtime": 301.8023, "eval_samples_per_second": 44.314, "eval_steps_per_second": 1.385, "step": 75000 }, { "epoch": 22.46, "grad_norm": 0.05614122748374939, "learning_rate": 3.877673097053985e-05, "loss": 0.0101, "step": 75100 }, { "epoch": 22.49, "grad_norm": 0.23737676441669464, "learning_rate": 3.876177658142665e-05, "loss": 0.0111, "step": 75200 }, { "epoch": 22.52, "grad_norm": 0.11609382182359695, "learning_rate": 3.874682219231345e-05, "loss": 0.0129, "step": 75300 }, { "epoch": 22.55, "grad_norm": 0.006964783184230328, "learning_rate": 3.8731867803200236e-05, "loss": 0.014, "step": 75400 }, { "epoch": 22.58, "grad_norm": 0.6018117070198059, "learning_rate": 3.8716913414087036e-05, "loss": 0.0092, "step": 75500 }, { "epoch": 22.61, "grad_norm": 1.5463790893554688, "learning_rate": 3.8701959024973836e-05, "loss": 0.0129, "step": 75600 }, { "epoch": 22.64, "grad_norm": 0.3491170108318329, "learning_rate": 3.868700463586062e-05, "loss": 0.0124, "step": 75700 }, { "epoch": 22.67, "grad_norm": 0.3379780650138855, "learning_rate": 3.867205024674742e-05, "loss": 0.0105, "step": 75800 }, { "epoch": 22.7, "grad_norm": 0.6625536680221558, "learning_rate": 3.865709585763422e-05, "loss": 0.0101, "step": 75900 }, { "epoch": 22.73, "grad_norm": 0.5047014951705933, "learning_rate": 3.8642141468521014e-05, "loss": 0.0116, "step": 76000 }, { "epoch": 22.73, "eval_loss": 0.309579074382782, "eval_precision": 0.9289195145420119, "eval_recall": 0.9214261522830136, "eval_runtime": 306.5207, "eval_samples_per_second": 43.632, "eval_steps_per_second": 1.364, "step": 76000 }, { "epoch": 22.76, "grad_norm": 2.8879668712615967, "learning_rate": 3.862718707940781e-05, "loss": 0.0084, "step": 76100 }, { "epoch": 22.79, "grad_norm": 1.4628148078918457, "learning_rate": 3.86122326902946e-05, "loss": 0.0091, "step": 76200 }, { "epoch": 22.82, "grad_norm": 0.01455759722739458, "learning_rate": 3.85972783011814e-05, "loss": 0.0087, "step": 76300 }, { "epoch": 22.85, "grad_norm": 0.005665886681526899, "learning_rate": 3.858232391206819e-05, "loss": 0.0117, "step": 76400 }, { "epoch": 22.88, "grad_norm": 0.5273276567459106, "learning_rate": 3.8567369522954985e-05, "loss": 0.009, "step": 76500 }, { "epoch": 22.91, "grad_norm": 0.06718481332063675, "learning_rate": 3.8552415133841785e-05, "loss": 0.0118, "step": 76600 }, { "epoch": 22.94, "grad_norm": 0.30258700251579285, "learning_rate": 3.8537460744728585e-05, "loss": 0.0109, "step": 76700 }, { "epoch": 22.97, "grad_norm": 2.678166627883911, "learning_rate": 3.852250635561537e-05, "loss": 0.015, "step": 76800 }, { "epoch": 23.0, "grad_norm": 0.15017007291316986, "learning_rate": 3.850755196650217e-05, "loss": 0.0104, "step": 76900 }, { "epoch": 23.03, "grad_norm": 0.3501853048801422, "learning_rate": 3.849259757738897e-05, "loss": 0.0096, "step": 77000 }, { "epoch": 23.03, "eval_loss": 0.2935163080692291, "eval_precision": 0.9276991482965932, "eval_recall": 0.9121894146987284, "eval_runtime": 303.8246, "eval_samples_per_second": 44.019, "eval_steps_per_second": 1.376, "step": 77000 }, { "epoch": 23.06, "grad_norm": 0.729576587677002, "learning_rate": 3.8477643188275756e-05, "loss": 0.0076, "step": 77100 }, { "epoch": 23.09, "grad_norm": 0.03431198373436928, "learning_rate": 3.8462688799162556e-05, "loss": 0.0068, "step": 77200 }, { "epoch": 23.12, "grad_norm": 0.022281186655163765, "learning_rate": 3.844773441004935e-05, "loss": 0.0099, "step": 77300 }, { "epoch": 23.15, "grad_norm": 0.06289653480052948, "learning_rate": 3.843278002093615e-05, "loss": 0.0088, "step": 77400 }, { "epoch": 23.18, "grad_norm": 1.1686757802963257, "learning_rate": 3.841782563182294e-05, "loss": 0.0113, "step": 77500 }, { "epoch": 23.21, "grad_norm": 0.6460024118423462, "learning_rate": 3.8402871242709734e-05, "loss": 0.0098, "step": 77600 }, { "epoch": 23.24, "grad_norm": 0.04333605244755745, "learning_rate": 3.8387916853596534e-05, "loss": 0.0078, "step": 77700 }, { "epoch": 23.27, "grad_norm": 1.6560355424880981, "learning_rate": 3.8372962464483327e-05, "loss": 0.0069, "step": 77800 }, { "epoch": 23.3, "grad_norm": 1.7110439538955688, "learning_rate": 3.835800807537012e-05, "loss": 0.0079, "step": 77900 }, { "epoch": 23.33, "grad_norm": 0.34755662083625793, "learning_rate": 3.834305368625692e-05, "loss": 0.0117, "step": 78000 }, { "epoch": 23.33, "eval_loss": 0.31362003087997437, "eval_precision": 0.9317794739166089, "eval_recall": 0.9096031281751286, "eval_runtime": 302.9137, "eval_samples_per_second": 44.151, "eval_steps_per_second": 1.38, "step": 78000 }, { "epoch": 23.36, "grad_norm": 0.07322967052459717, "learning_rate": 3.832809929714372e-05, "loss": 0.0086, "step": 78100 }, { "epoch": 23.39, "grad_norm": 0.1620834916830063, "learning_rate": 3.8313144908030505e-05, "loss": 0.0105, "step": 78200 }, { "epoch": 23.42, "grad_norm": 1.0541850328445435, "learning_rate": 3.8298190518917305e-05, "loss": 0.011, "step": 78300 }, { "epoch": 23.44, "grad_norm": 0.008509721606969833, "learning_rate": 3.8283236129804104e-05, "loss": 0.009, "step": 78400 }, { "epoch": 23.47, "grad_norm": 0.2723921537399292, "learning_rate": 3.826828174069089e-05, "loss": 0.0089, "step": 78500 }, { "epoch": 23.5, "grad_norm": 0.7700883150100708, "learning_rate": 3.825332735157769e-05, "loss": 0.0084, "step": 78600 }, { "epoch": 23.53, "grad_norm": 0.7245194911956787, "learning_rate": 3.823837296246448e-05, "loss": 0.0068, "step": 78700 }, { "epoch": 23.56, "grad_norm": 1.283056378364563, "learning_rate": 3.822341857335128e-05, "loss": 0.0108, "step": 78800 }, { "epoch": 23.59, "grad_norm": 0.016398323699831963, "learning_rate": 3.8208464184238075e-05, "loss": 0.0104, "step": 78900 }, { "epoch": 23.62, "grad_norm": 0.32268649339675903, "learning_rate": 3.819350979512487e-05, "loss": 0.0085, "step": 79000 }, { "epoch": 23.62, "eval_loss": 0.30707934498786926, "eval_precision": 0.9256538985992314, "eval_recall": 0.9196403830167185, "eval_runtime": 304.8987, "eval_samples_per_second": 43.864, "eval_steps_per_second": 1.371, "step": 79000 }, { "epoch": 23.65, "grad_norm": 0.1340191662311554, "learning_rate": 3.817855540601167e-05, "loss": 0.0132, "step": 79100 }, { "epoch": 23.68, "grad_norm": 1.2741714715957642, "learning_rate": 3.816360101689846e-05, "loss": 0.0086, "step": 79200 }, { "epoch": 23.71, "grad_norm": 3.2270684242248535, "learning_rate": 3.8148646627785254e-05, "loss": 0.012, "step": 79300 }, { "epoch": 23.74, "grad_norm": 0.0873398706316948, "learning_rate": 3.813369223867205e-05, "loss": 0.0071, "step": 79400 }, { "epoch": 23.77, "grad_norm": 0.36740046739578247, "learning_rate": 3.811873784955885e-05, "loss": 0.0082, "step": 79500 }, { "epoch": 23.8, "grad_norm": 0.7461920976638794, "learning_rate": 3.810378346044564e-05, "loss": 0.0133, "step": 79600 }, { "epoch": 23.83, "grad_norm": 1.0577598810195923, "learning_rate": 3.808882907133244e-05, "loss": 0.0118, "step": 79700 }, { "epoch": 23.86, "grad_norm": 1.9472182989120483, "learning_rate": 3.807387468221923e-05, "loss": 0.0116, "step": 79800 }, { "epoch": 23.89, "grad_norm": 1.6104402542114258, "learning_rate": 3.8058920293106025e-05, "loss": 0.0114, "step": 79900 }, { "epoch": 23.92, "grad_norm": 0.03251710161566734, "learning_rate": 3.8043965903992824e-05, "loss": 0.0091, "step": 80000 }, { "epoch": 23.92, "eval_loss": 0.3046566843986511, "eval_precision": 0.9268397735663303, "eval_recall": 0.9275531882139229, "eval_runtime": 305.7377, "eval_samples_per_second": 43.743, "eval_steps_per_second": 1.367, "step": 80000 }, { "epoch": 23.95, "grad_norm": 0.8245527744293213, "learning_rate": 3.802901151487962e-05, "loss": 0.0067, "step": 80100 }, { "epoch": 23.98, "grad_norm": 2.3082966804504395, "learning_rate": 3.801405712576642e-05, "loss": 0.0103, "step": 80200 }, { "epoch": 24.01, "grad_norm": 0.05168503150343895, "learning_rate": 3.799910273665321e-05, "loss": 0.0086, "step": 80300 }, { "epoch": 24.04, "grad_norm": 0.3247091770172119, "learning_rate": 3.798414834754e-05, "loss": 0.0082, "step": 80400 }, { "epoch": 24.07, "grad_norm": 0.30284127593040466, "learning_rate": 3.79691939584268e-05, "loss": 0.0065, "step": 80500 }, { "epoch": 24.1, "grad_norm": 0.041343070566654205, "learning_rate": 3.7954239569313595e-05, "loss": 0.0072, "step": 80600 }, { "epoch": 24.13, "grad_norm": 0.5980477929115295, "learning_rate": 3.793928518020039e-05, "loss": 0.0088, "step": 80700 }, { "epoch": 24.16, "grad_norm": 0.0064304666593670845, "learning_rate": 3.792433079108719e-05, "loss": 0.0094, "step": 80800 }, { "epoch": 24.19, "grad_norm": 0.6040250062942505, "learning_rate": 3.790937640197398e-05, "loss": 0.0079, "step": 80900 }, { "epoch": 24.22, "grad_norm": 0.3337300419807434, "learning_rate": 3.7894422012860773e-05, "loss": 0.0086, "step": 81000 }, { "epoch": 24.22, "eval_loss": 0.3350207209587097, "eval_precision": 0.9268361054008597, "eval_recall": 0.916192000985252, "eval_runtime": 304.7162, "eval_samples_per_second": 43.89, "eval_steps_per_second": 1.372, "step": 81000 }, { "epoch": 24.25, "grad_norm": 0.710114061832428, "learning_rate": 3.787946762374757e-05, "loss": 0.008, "step": 81100 }, { "epoch": 24.28, "grad_norm": 0.03623099625110626, "learning_rate": 3.7864513234634366e-05, "loss": 0.0131, "step": 81200 }, { "epoch": 24.31, "grad_norm": 0.09887418150901794, "learning_rate": 3.784955884552116e-05, "loss": 0.0086, "step": 81300 }, { "epoch": 24.34, "grad_norm": 0.6916789412498474, "learning_rate": 3.783460445640796e-05, "loss": 0.0101, "step": 81400 }, { "epoch": 24.37, "grad_norm": 1.4278247356414795, "learning_rate": 3.781965006729475e-05, "loss": 0.0107, "step": 81500 }, { "epoch": 24.4, "grad_norm": 0.16397880017757416, "learning_rate": 3.7804695678181544e-05, "loss": 0.008, "step": 81600 }, { "epoch": 24.43, "grad_norm": 0.08632964640855789, "learning_rate": 3.7789741289068344e-05, "loss": 0.0078, "step": 81700 }, { "epoch": 24.46, "grad_norm": 2.2472782135009766, "learning_rate": 3.777478689995514e-05, "loss": 0.011, "step": 81800 }, { "epoch": 24.49, "grad_norm": 0.14701958000659943, "learning_rate": 3.7759832510841936e-05, "loss": 0.0096, "step": 81900 }, { "epoch": 24.52, "grad_norm": 0.051196735352277756, "learning_rate": 3.774487812172873e-05, "loss": 0.0111, "step": 82000 }, { "epoch": 24.52, "eval_loss": 0.30252349376678467, "eval_precision": 0.928390712570056, "eval_recall": 0.8925459527694818, "eval_runtime": 302.8814, "eval_samples_per_second": 44.156, "eval_steps_per_second": 1.38, "step": 82000 }, { "epoch": 24.55, "grad_norm": 0.013324776664376259, "learning_rate": 3.772992373261552e-05, "loss": 0.0075, "step": 82100 }, { "epoch": 24.58, "grad_norm": 0.10291430354118347, "learning_rate": 3.771496934350232e-05, "loss": 0.0099, "step": 82200 }, { "epoch": 24.61, "grad_norm": 0.07137342542409897, "learning_rate": 3.7700014954389115e-05, "loss": 0.012, "step": 82300 }, { "epoch": 24.64, "grad_norm": 0.3020240068435669, "learning_rate": 3.768506056527591e-05, "loss": 0.0087, "step": 82400 }, { "epoch": 24.67, "grad_norm": 1.067194938659668, "learning_rate": 3.767010617616271e-05, "loss": 0.0096, "step": 82500 }, { "epoch": 24.7, "grad_norm": 0.014255263842642307, "learning_rate": 3.76551517870495e-05, "loss": 0.007, "step": 82600 }, { "epoch": 24.73, "grad_norm": 0.02688017673790455, "learning_rate": 3.764019739793629e-05, "loss": 0.0089, "step": 82700 }, { "epoch": 24.76, "grad_norm": 0.3376453220844269, "learning_rate": 3.762524300882309e-05, "loss": 0.0066, "step": 82800 }, { "epoch": 24.79, "grad_norm": 0.10389913618564606, "learning_rate": 3.7610288619709886e-05, "loss": 0.0066, "step": 82900 }, { "epoch": 24.82, "grad_norm": 0.7046878337860107, "learning_rate": 3.759533423059668e-05, "loss": 0.01, "step": 83000 }, { "epoch": 24.82, "eval_loss": 0.3185621201992035, "eval_precision": 0.9291735873891379, "eval_recall": 0.9128667754549094, "eval_runtime": 303.4192, "eval_samples_per_second": 44.078, "eval_steps_per_second": 1.378, "step": 83000 }, { "epoch": 24.85, "grad_norm": 0.4447859227657318, "learning_rate": 3.758037984148348e-05, "loss": 0.0085, "step": 83100 }, { "epoch": 24.88, "grad_norm": 2.2701525688171387, "learning_rate": 3.756542545237027e-05, "loss": 0.0114, "step": 83200 }, { "epoch": 24.91, "grad_norm": 0.05526027828454971, "learning_rate": 3.755047106325707e-05, "loss": 0.012, "step": 83300 }, { "epoch": 24.94, "grad_norm": 0.8909191489219666, "learning_rate": 3.7535516674143864e-05, "loss": 0.0097, "step": 83400 }, { "epoch": 24.97, "grad_norm": 0.004659523721784353, "learning_rate": 3.7520562285030656e-05, "loss": 0.0085, "step": 83500 }, { "epoch": 25.0, "grad_norm": 0.05222604423761368, "learning_rate": 3.7505607895917456e-05, "loss": 0.0088, "step": 83600 }, { "epoch": 25.03, "grad_norm": 0.014093970879912376, "learning_rate": 3.749065350680425e-05, "loss": 0.0085, "step": 83700 }, { "epoch": 25.06, "grad_norm": 0.0026446671690791845, "learning_rate": 3.747569911769104e-05, "loss": 0.005, "step": 83800 }, { "epoch": 25.09, "grad_norm": 0.1448344588279724, "learning_rate": 3.746074472857784e-05, "loss": 0.0064, "step": 83900 }, { "epoch": 25.12, "grad_norm": 0.295718789100647, "learning_rate": 3.7445790339464634e-05, "loss": 0.0067, "step": 84000 }, { "epoch": 25.12, "eval_loss": 0.32626327872276306, "eval_precision": 0.9313109964567663, "eval_recall": 0.9225653499184088, "eval_runtime": 304.7239, "eval_samples_per_second": 43.889, "eval_steps_per_second": 1.372, "step": 84000 }, { "epoch": 25.15, "grad_norm": 0.028157589957118034, "learning_rate": 3.743083595035143e-05, "loss": 0.0094, "step": 84100 }, { "epoch": 25.18, "grad_norm": 0.002226242097094655, "learning_rate": 3.741588156123823e-05, "loss": 0.0072, "step": 84200 }, { "epoch": 25.21, "grad_norm": 0.7868858575820923, "learning_rate": 3.740092717212502e-05, "loss": 0.0103, "step": 84300 }, { "epoch": 25.24, "grad_norm": 0.031047280877828598, "learning_rate": 3.738597278301181e-05, "loss": 0.01, "step": 84400 }, { "epoch": 25.27, "grad_norm": 0.30554434657096863, "learning_rate": 3.737101839389861e-05, "loss": 0.0076, "step": 84500 }, { "epoch": 25.3, "grad_norm": 1.2695821523666382, "learning_rate": 3.7356064004785405e-05, "loss": 0.0092, "step": 84600 }, { "epoch": 25.33, "grad_norm": 0.039061836898326874, "learning_rate": 3.7341109615672205e-05, "loss": 0.0129, "step": 84700 }, { "epoch": 25.36, "grad_norm": 1.0094258785247803, "learning_rate": 3.7326155226559e-05, "loss": 0.012, "step": 84800 }, { "epoch": 25.39, "grad_norm": 0.16602523624897003, "learning_rate": 3.731120083744579e-05, "loss": 0.0072, "step": 84900 }, { "epoch": 25.42, "grad_norm": 0.6232153177261353, "learning_rate": 3.729624644833259e-05, "loss": 0.0094, "step": 85000 }, { "epoch": 25.42, "eval_loss": 0.32043251395225525, "eval_precision": 0.9310592123725484, "eval_recall": 0.91936328088919, "eval_runtime": 304.0822, "eval_samples_per_second": 43.982, "eval_steps_per_second": 1.375, "step": 85000 }, { "epoch": 25.45, "grad_norm": 1.6009403467178345, "learning_rate": 3.728129205921938e-05, "loss": 0.0103, "step": 85100 }, { "epoch": 25.48, "grad_norm": 0.6107264757156372, "learning_rate": 3.7266337670106176e-05, "loss": 0.0079, "step": 85200 }, { "epoch": 25.51, "grad_norm": 0.44173404574394226, "learning_rate": 3.7251383280992976e-05, "loss": 0.0065, "step": 85300 }, { "epoch": 25.54, "grad_norm": 0.9073717594146729, "learning_rate": 3.723642889187977e-05, "loss": 0.0071, "step": 85400 }, { "epoch": 25.57, "grad_norm": 0.3392820656299591, "learning_rate": 3.722147450276656e-05, "loss": 0.0101, "step": 85500 }, { "epoch": 25.6, "grad_norm": 0.07929588109254837, "learning_rate": 3.720652011365336e-05, "loss": 0.0083, "step": 85600 }, { "epoch": 25.63, "grad_norm": 0.35071372985839844, "learning_rate": 3.7191565724540154e-05, "loss": 0.0121, "step": 85700 }, { "epoch": 25.66, "grad_norm": 0.20559339225292206, "learning_rate": 3.717661133542695e-05, "loss": 0.0073, "step": 85800 }, { "epoch": 25.69, "grad_norm": 0.045159224420785904, "learning_rate": 3.716165694631375e-05, "loss": 0.0087, "step": 85900 }, { "epoch": 25.72, "grad_norm": 0.10148915648460388, "learning_rate": 3.714670255720054e-05, "loss": 0.0119, "step": 86000 }, { "epoch": 25.72, "eval_loss": 0.31306663155555725, "eval_precision": 0.9333648989898989, "eval_recall": 0.9104036454324332, "eval_runtime": 304.164, "eval_samples_per_second": 43.97, "eval_steps_per_second": 1.374, "step": 86000 }, { "epoch": 25.75, "grad_norm": 0.18669423460960388, "learning_rate": 3.713174816808734e-05, "loss": 0.0063, "step": 86100 }, { "epoch": 25.78, "grad_norm": 0.10197019577026367, "learning_rate": 3.711679377897413e-05, "loss": 0.0083, "step": 86200 }, { "epoch": 25.81, "grad_norm": 0.0219405684620142, "learning_rate": 3.7101839389860925e-05, "loss": 0.0088, "step": 86300 }, { "epoch": 25.84, "grad_norm": 0.941899836063385, "learning_rate": 3.7086885000747725e-05, "loss": 0.006, "step": 86400 }, { "epoch": 25.87, "grad_norm": 0.042357202619314194, "learning_rate": 3.707193061163452e-05, "loss": 0.0107, "step": 86500 }, { "epoch": 25.9, "grad_norm": 0.04090040549635887, "learning_rate": 3.705697622252131e-05, "loss": 0.0076, "step": 86600 }, { "epoch": 25.93, "grad_norm": 1.0006482601165771, "learning_rate": 3.704202183340811e-05, "loss": 0.0081, "step": 86700 }, { "epoch": 25.96, "grad_norm": 0.01344706118106842, "learning_rate": 3.70270674442949e-05, "loss": 0.0061, "step": 86800 }, { "epoch": 25.99, "grad_norm": 0.039950937032699585, "learning_rate": 3.7012113055181696e-05, "loss": 0.0095, "step": 86900 }, { "epoch": 26.02, "grad_norm": 0.007412883453071117, "learning_rate": 3.6997158666068496e-05, "loss": 0.0061, "step": 87000 }, { "epoch": 26.02, "eval_loss": 0.3440411686897278, "eval_precision": 0.9280669958127618, "eval_recall": 0.9144370208442378, "eval_runtime": 304.1449, "eval_samples_per_second": 43.972, "eval_steps_per_second": 1.374, "step": 87000 }, { "epoch": 26.05, "grad_norm": 0.045031215995550156, "learning_rate": 3.698220427695529e-05, "loss": 0.0083, "step": 87100 }, { "epoch": 26.08, "grad_norm": 0.5366631150245667, "learning_rate": 3.696724988784208e-05, "loss": 0.0069, "step": 87200 }, { "epoch": 26.11, "grad_norm": 0.24467185139656067, "learning_rate": 3.695229549872888e-05, "loss": 0.0065, "step": 87300 }, { "epoch": 26.14, "grad_norm": 0.7528616786003113, "learning_rate": 3.6937341109615674e-05, "loss": 0.0087, "step": 87400 }, { "epoch": 26.17, "grad_norm": 0.15506117045879364, "learning_rate": 3.692238672050247e-05, "loss": 0.0072, "step": 87500 }, { "epoch": 26.2, "grad_norm": 0.2464226335287094, "learning_rate": 3.6907432331389266e-05, "loss": 0.0053, "step": 87600 }, { "epoch": 26.23, "grad_norm": 0.15138311684131622, "learning_rate": 3.689247794227606e-05, "loss": 0.0063, "step": 87700 }, { "epoch": 26.26, "grad_norm": 0.07477385550737381, "learning_rate": 3.687752355316286e-05, "loss": 0.0076, "step": 87800 }, { "epoch": 26.29, "grad_norm": 0.661697268486023, "learning_rate": 3.686256916404965e-05, "loss": 0.0078, "step": 87900 }, { "epoch": 26.32, "grad_norm": 0.16399236023426056, "learning_rate": 3.6847614774936445e-05, "loss": 0.0085, "step": 88000 }, { "epoch": 26.32, "eval_loss": 0.326471209526062, "eval_precision": 0.9298322483725588, "eval_recall": 0.9147449120970473, "eval_runtime": 305.1957, "eval_samples_per_second": 43.821, "eval_steps_per_second": 1.37, "step": 88000 }, { "epoch": 26.35, "grad_norm": 0.5788341164588928, "learning_rate": 3.6832660385823244e-05, "loss": 0.0097, "step": 88100 }, { "epoch": 26.38, "grad_norm": 0.38478532433509827, "learning_rate": 3.681770599671003e-05, "loss": 0.0083, "step": 88200 }, { "epoch": 26.41, "grad_norm": 1.8616811037063599, "learning_rate": 3.680275160759683e-05, "loss": 0.0082, "step": 88300 }, { "epoch": 26.44, "grad_norm": 0.005648652091622353, "learning_rate": 3.678779721848363e-05, "loss": 0.0074, "step": 88400 }, { "epoch": 26.47, "grad_norm": 0.013662021607160568, "learning_rate": 3.677284282937042e-05, "loss": 0.0054, "step": 88500 }, { "epoch": 26.5, "grad_norm": 0.21754692494869232, "learning_rate": 3.6757888440257216e-05, "loss": 0.0115, "step": 88600 }, { "epoch": 26.53, "grad_norm": 0.0358903631567955, "learning_rate": 3.6742934051144015e-05, "loss": 0.0097, "step": 88700 }, { "epoch": 26.56, "grad_norm": 0.9966431856155396, "learning_rate": 3.672797966203081e-05, "loss": 0.0074, "step": 88800 }, { "epoch": 26.58, "grad_norm": 0.7227293848991394, "learning_rate": 3.67130252729176e-05, "loss": 0.0088, "step": 88900 }, { "epoch": 26.61, "grad_norm": 1.3261148929595947, "learning_rate": 3.66980708838044e-05, "loss": 0.0072, "step": 89000 }, { "epoch": 26.61, "eval_loss": 0.3263101279735565, "eval_precision": 0.9263782601905357, "eval_recall": 0.9131438775824379, "eval_runtime": 306.4472, "eval_samples_per_second": 43.642, "eval_steps_per_second": 1.364, "step": 89000 }, { "epoch": 26.64, "grad_norm": 0.11170350760221481, "learning_rate": 3.6683116494691194e-05, "loss": 0.0092, "step": 89100 }, { "epoch": 26.67, "grad_norm": 1.529340147972107, "learning_rate": 3.666816210557799e-05, "loss": 0.0089, "step": 89200 }, { "epoch": 26.7, "grad_norm": 0.01682981289923191, "learning_rate": 3.665320771646478e-05, "loss": 0.0093, "step": 89300 }, { "epoch": 26.73, "grad_norm": 0.3299085199832916, "learning_rate": 3.663825332735158e-05, "loss": 0.0063, "step": 89400 }, { "epoch": 26.76, "grad_norm": 1.9823254346847534, "learning_rate": 3.662329893823838e-05, "loss": 0.0091, "step": 89500 }, { "epoch": 26.79, "grad_norm": 0.07487453520298004, "learning_rate": 3.6608344549125165e-05, "loss": 0.009, "step": 89600 }, { "epoch": 26.82, "grad_norm": 0.015319288708269596, "learning_rate": 3.6593390160011964e-05, "loss": 0.0078, "step": 89700 }, { "epoch": 26.85, "grad_norm": 0.004087815526872873, "learning_rate": 3.6578435770898764e-05, "loss": 0.0069, "step": 89800 }, { "epoch": 26.88, "grad_norm": 0.00753753213211894, "learning_rate": 3.656348138178556e-05, "loss": 0.0057, "step": 89900 }, { "epoch": 26.91, "grad_norm": 0.012257667258381844, "learning_rate": 3.654852699267235e-05, "loss": 0.0095, "step": 90000 }, { "epoch": 26.91, "eval_loss": 0.3233014643192291, "eval_precision": 0.9329517062525696, "eval_recall": 0.9082484066627667, "eval_runtime": 304.4964, "eval_samples_per_second": 43.922, "eval_steps_per_second": 1.373, "step": 90000 }, { "epoch": 26.94, "grad_norm": 0.030741436406970024, "learning_rate": 3.653357260355915e-05, "loss": 0.0067, "step": 90100 }, { "epoch": 26.97, "grad_norm": 0.429049551486969, "learning_rate": 3.651861821444594e-05, "loss": 0.012, "step": 90200 }, { "epoch": 27.0, "grad_norm": 0.002479678951203823, "learning_rate": 3.6503663825332735e-05, "loss": 0.005, "step": 90300 }, { "epoch": 27.03, "grad_norm": 0.12390375137329102, "learning_rate": 3.648870943621953e-05, "loss": 0.0083, "step": 90400 }, { "epoch": 27.06, "grad_norm": 0.044969938695430756, "learning_rate": 3.647375504710633e-05, "loss": 0.0073, "step": 90500 }, { "epoch": 27.09, "grad_norm": 0.06378799676895142, "learning_rate": 3.645880065799313e-05, "loss": 0.0073, "step": 90600 }, { "epoch": 27.12, "grad_norm": 0.323734849691391, "learning_rate": 3.6443846268879914e-05, "loss": 0.0078, "step": 90700 }, { "epoch": 27.15, "grad_norm": 1.6457269191741943, "learning_rate": 3.642889187976671e-05, "loss": 0.0055, "step": 90800 }, { "epoch": 27.18, "grad_norm": 0.007004741113632917, "learning_rate": 3.641393749065351e-05, "loss": 0.0065, "step": 90900 }, { "epoch": 27.21, "grad_norm": 0.06395163387060165, "learning_rate": 3.63989831015403e-05, "loss": 0.0062, "step": 91000 }, { "epoch": 27.21, "eval_loss": 0.32764899730682373, "eval_precision": 0.9317584480600751, "eval_recall": 0.916869361741433, "eval_runtime": 309.1631, "eval_samples_per_second": 43.259, "eval_steps_per_second": 1.352, "step": 91000 }, { "epoch": 27.24, "grad_norm": 0.005486265290528536, "learning_rate": 3.63840287124271e-05, "loss": 0.0082, "step": 91100 }, { "epoch": 27.27, "grad_norm": 2.3132262229919434, "learning_rate": 3.63690743233139e-05, "loss": 0.0067, "step": 91200 }, { "epoch": 27.3, "grad_norm": 0.07687461376190186, "learning_rate": 3.635411993420069e-05, "loss": 0.0051, "step": 91300 }, { "epoch": 27.33, "grad_norm": 0.05096305161714554, "learning_rate": 3.6339165545087484e-05, "loss": 0.0061, "step": 91400 }, { "epoch": 27.36, "grad_norm": 0.21200311183929443, "learning_rate": 3.6324211155974284e-05, "loss": 0.0072, "step": 91500 }, { "epoch": 27.39, "grad_norm": 0.07336900383234024, "learning_rate": 3.630925676686108e-05, "loss": 0.008, "step": 91600 }, { "epoch": 27.42, "grad_norm": 0.026788916438817978, "learning_rate": 3.629430237774787e-05, "loss": 0.0068, "step": 91700 }, { "epoch": 27.45, "grad_norm": 0.03046250529587269, "learning_rate": 3.627934798863466e-05, "loss": 0.0081, "step": 91800 }, { "epoch": 27.48, "grad_norm": 0.32240158319473267, "learning_rate": 3.626439359952146e-05, "loss": 0.0091, "step": 91900 }, { "epoch": 27.51, "grad_norm": 0.1428656429052353, "learning_rate": 3.624943921040826e-05, "loss": 0.007, "step": 92000 }, { "epoch": 27.51, "eval_loss": 0.3499869704246521, "eval_precision": 0.9278612426685068, "eval_recall": 0.9108346931863666, "eval_runtime": 310.2456, "eval_samples_per_second": 43.108, "eval_steps_per_second": 1.347, "step": 92000 } ], "logging_steps": 100, "max_steps": 334400, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 4.8090441780412416e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }