{ "best_metric": 0.9900356268091739, "best_model_checkpoint": "Garbage-Classification-SWIN-Transformer/checkpoint-2800", "epoch": 9.973285841495994, "eval_steps": 500, "global_step": 2800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03561887800534283, "grad_norm": 3.4222159385681152, "learning_rate": 1.7857142857142857e-06, "loss": 1.4083, "step": 10 }, { "epoch": 0.07123775601068566, "grad_norm": 3.2973814010620117, "learning_rate": 3.5714285714285714e-06, "loss": 1.3949, "step": 20 }, { "epoch": 0.10685663401602849, "grad_norm": 3.3026835918426514, "learning_rate": 5.357142857142857e-06, "loss": 1.3661, "step": 30 }, { "epoch": 0.14247551202137132, "grad_norm": 4.8538289070129395, "learning_rate": 7.142857142857143e-06, "loss": 1.3301, "step": 40 }, { "epoch": 0.17809439002671415, "grad_norm": 6.877591609954834, "learning_rate": 8.92857142857143e-06, "loss": 1.2576, "step": 50 }, { "epoch": 0.21371326803205698, "grad_norm": 5.944957256317139, "learning_rate": 1.0714285714285714e-05, "loss": 1.1654, "step": 60 }, { "epoch": 0.2493321460373998, "grad_norm": 6.024969100952148, "learning_rate": 1.25e-05, "loss": 1.0324, "step": 70 }, { "epoch": 0.28495102404274264, "grad_norm": 6.543083667755127, "learning_rate": 1.4285714285714285e-05, "loss": 0.8479, "step": 80 }, { "epoch": 0.3205699020480855, "grad_norm": 5.732212543487549, "learning_rate": 1.6071428571428572e-05, "loss": 0.7139, "step": 90 }, { "epoch": 0.3561887800534283, "grad_norm": 6.9801459312438965, "learning_rate": 1.785714285714286e-05, "loss": 0.5947, "step": 100 }, { "epoch": 0.39180765805877116, "grad_norm": 7.786228656768799, "learning_rate": 1.9642857142857145e-05, "loss": 0.5308, "step": 110 }, { "epoch": 0.42742653606411396, "grad_norm": 7.892849445343018, "learning_rate": 2.1428571428571428e-05, "loss": 0.4402, "step": 120 }, { "epoch": 0.4630454140694568, "grad_norm": 6.560166835784912, "learning_rate": 2.3214285714285715e-05, "loss": 0.4023, "step": 130 }, { "epoch": 0.4986642920747996, "grad_norm": 7.141245365142822, "learning_rate": 2.5e-05, "loss": 0.3745, "step": 140 }, { "epoch": 0.5342831700801425, "grad_norm": 4.8248419761657715, "learning_rate": 2.6785714285714288e-05, "loss": 0.3398, "step": 150 }, { "epoch": 0.5699020480854853, "grad_norm": 5.556511878967285, "learning_rate": 2.857142857142857e-05, "loss": 0.2968, "step": 160 }, { "epoch": 0.6055209260908282, "grad_norm": 7.37336540222168, "learning_rate": 3.0357142857142857e-05, "loss": 0.2907, "step": 170 }, { "epoch": 0.641139804096171, "grad_norm": 6.2109503746032715, "learning_rate": 3.2142857142857144e-05, "loss": 0.2644, "step": 180 }, { "epoch": 0.6767586821015138, "grad_norm": 5.759757995605469, "learning_rate": 3.392857142857143e-05, "loss": 0.2918, "step": 190 }, { "epoch": 0.7123775601068566, "grad_norm": 3.9079067707061768, "learning_rate": 3.571428571428572e-05, "loss": 0.251, "step": 200 }, { "epoch": 0.7479964381121995, "grad_norm": 4.728653430938721, "learning_rate": 3.7500000000000003e-05, "loss": 0.255, "step": 210 }, { "epoch": 0.7836153161175423, "grad_norm": 5.230493545532227, "learning_rate": 3.928571428571429e-05, "loss": 0.2118, "step": 220 }, { "epoch": 0.8192341941228851, "grad_norm": 5.2034101486206055, "learning_rate": 4.107142857142857e-05, "loss": 0.2045, "step": 230 }, { "epoch": 0.8548530721282279, "grad_norm": 5.750576972961426, "learning_rate": 4.2857142857142856e-05, "loss": 0.2076, "step": 240 }, { "epoch": 0.8904719501335708, "grad_norm": 4.098123550415039, "learning_rate": 4.464285714285715e-05, "loss": 0.2077, "step": 250 }, { "epoch": 0.9260908281389136, "grad_norm": 6.287376880645752, "learning_rate": 4.642857142857143e-05, "loss": 0.1989, "step": 260 }, { "epoch": 0.9617097061442564, "grad_norm": 5.270883083343506, "learning_rate": 4.8214285714285716e-05, "loss": 0.1782, "step": 270 }, { "epoch": 0.9973285841495992, "grad_norm": 9.215749740600586, "learning_rate": 5e-05, "loss": 0.1969, "step": 280 }, { "epoch": 0.9973285841495992, "eval_accuracy": 0.9409374304163883, "eval_loss": 0.17398740351200104, "eval_runtime": 169.1651, "eval_samples_per_second": 106.192, "eval_steps_per_second": 1.661, "step": 280 }, { "epoch": 1.0329474621549422, "grad_norm": 3.8531081676483154, "learning_rate": 4.9801587301587306e-05, "loss": 0.1794, "step": 290 }, { "epoch": 1.068566340160285, "grad_norm": 5.727213382720947, "learning_rate": 4.960317460317461e-05, "loss": 0.1792, "step": 300 }, { "epoch": 1.1041852181656278, "grad_norm": 5.829253673553467, "learning_rate": 4.940476190476191e-05, "loss": 0.1445, "step": 310 }, { "epoch": 1.1398040961709706, "grad_norm": 5.027329921722412, "learning_rate": 4.9206349206349204e-05, "loss": 0.1282, "step": 320 }, { "epoch": 1.1754229741763134, "grad_norm": 3.377734422683716, "learning_rate": 4.900793650793651e-05, "loss": 0.1346, "step": 330 }, { "epoch": 1.2110418521816562, "grad_norm": 4.863683700561523, "learning_rate": 4.880952380952381e-05, "loss": 0.1141, "step": 340 }, { "epoch": 1.2466607301869992, "grad_norm": 6.36651086807251, "learning_rate": 4.8611111111111115e-05, "loss": 0.129, "step": 350 }, { "epoch": 1.282279608192342, "grad_norm": 6.058999061584473, "learning_rate": 4.841269841269841e-05, "loss": 0.1304, "step": 360 }, { "epoch": 1.3178984861976848, "grad_norm": 3.276484251022339, "learning_rate": 4.8214285714285716e-05, "loss": 0.1107, "step": 370 }, { "epoch": 1.3535173642030276, "grad_norm": 4.594018936157227, "learning_rate": 4.801587301587302e-05, "loss": 0.1185, "step": 380 }, { "epoch": 1.3891362422083704, "grad_norm": 2.975635290145874, "learning_rate": 4.781746031746032e-05, "loss": 0.1287, "step": 390 }, { "epoch": 1.4247551202137132, "grad_norm": 3.8723185062408447, "learning_rate": 4.761904761904762e-05, "loss": 0.1207, "step": 400 }, { "epoch": 1.460373998219056, "grad_norm": 2.9883522987365723, "learning_rate": 4.7420634920634924e-05, "loss": 0.1012, "step": 410 }, { "epoch": 1.495992876224399, "grad_norm": 4.591331481933594, "learning_rate": 4.722222222222222e-05, "loss": 0.1092, "step": 420 }, { "epoch": 1.5316117542297416, "grad_norm": 3.980555295944214, "learning_rate": 4.7023809523809525e-05, "loss": 0.1025, "step": 430 }, { "epoch": 1.5672306322350846, "grad_norm": 4.750612258911133, "learning_rate": 4.682539682539683e-05, "loss": 0.1047, "step": 440 }, { "epoch": 1.6028495102404274, "grad_norm": 3.2454400062561035, "learning_rate": 4.662698412698413e-05, "loss": 0.0914, "step": 450 }, { "epoch": 1.6384683882457702, "grad_norm": 11.585031509399414, "learning_rate": 4.642857142857143e-05, "loss": 0.0827, "step": 460 }, { "epoch": 1.674087266251113, "grad_norm": 2.573432207107544, "learning_rate": 4.623015873015873e-05, "loss": 0.0866, "step": 470 }, { "epoch": 1.7097061442564558, "grad_norm": 3.991711378097534, "learning_rate": 4.603174603174603e-05, "loss": 0.088, "step": 480 }, { "epoch": 1.7453250222617989, "grad_norm": 4.302712440490723, "learning_rate": 4.5833333333333334e-05, "loss": 0.0985, "step": 490 }, { "epoch": 1.7809439002671414, "grad_norm": 3.247912883758545, "learning_rate": 4.563492063492064e-05, "loss": 0.1015, "step": 500 }, { "epoch": 1.8165627782724845, "grad_norm": 3.324694871902466, "learning_rate": 4.543650793650794e-05, "loss": 0.101, "step": 510 }, { "epoch": 1.8521816562778273, "grad_norm": 5.875315189361572, "learning_rate": 4.523809523809524e-05, "loss": 0.0908, "step": 520 }, { "epoch": 1.88780053428317, "grad_norm": 2.6438138484954834, "learning_rate": 4.503968253968254e-05, "loss": 0.0979, "step": 530 }, { "epoch": 1.9234194122885129, "grad_norm": 2.6687426567077637, "learning_rate": 4.4841269841269846e-05, "loss": 0.0834, "step": 540 }, { "epoch": 1.9590382902938557, "grad_norm": 3.7509348392486572, "learning_rate": 4.464285714285715e-05, "loss": 0.0826, "step": 550 }, { "epoch": 1.9946571682991987, "grad_norm": 3.0679826736450195, "learning_rate": 4.4444444444444447e-05, "loss": 0.1014, "step": 560 }, { "epoch": 1.9982190560997328, "eval_accuracy": 0.9755065686929414, "eval_loss": 0.07521907240152359, "eval_runtime": 167.9798, "eval_samples_per_second": 106.941, "eval_steps_per_second": 1.673, "step": 561 }, { "epoch": 2.0302760463045413, "grad_norm": 4.248985290527344, "learning_rate": 4.4246031746031744e-05, "loss": 0.0633, "step": 570 }, { "epoch": 2.0658949243098843, "grad_norm": 3.44899320602417, "learning_rate": 4.404761904761905e-05, "loss": 0.0665, "step": 580 }, { "epoch": 2.101513802315227, "grad_norm": 4.588482856750488, "learning_rate": 4.384920634920635e-05, "loss": 0.0503, "step": 590 }, { "epoch": 2.13713268032057, "grad_norm": 3.4909369945526123, "learning_rate": 4.3650793650793655e-05, "loss": 0.0513, "step": 600 }, { "epoch": 2.172751558325913, "grad_norm": 2.8658242225646973, "learning_rate": 4.345238095238096e-05, "loss": 0.0552, "step": 610 }, { "epoch": 2.2083704363312555, "grad_norm": 3.1010851860046387, "learning_rate": 4.3253968253968256e-05, "loss": 0.0468, "step": 620 }, { "epoch": 2.2439893143365985, "grad_norm": 1.9300130605697632, "learning_rate": 4.305555555555556e-05, "loss": 0.0551, "step": 630 }, { "epoch": 2.279608192341941, "grad_norm": 2.8282604217529297, "learning_rate": 4.2857142857142856e-05, "loss": 0.0525, "step": 640 }, { "epoch": 2.315227070347284, "grad_norm": 4.43895149230957, "learning_rate": 4.265873015873016e-05, "loss": 0.0492, "step": 650 }, { "epoch": 2.3508459483526267, "grad_norm": 4.581601142883301, "learning_rate": 4.2460317460317464e-05, "loss": 0.0506, "step": 660 }, { "epoch": 2.3864648263579697, "grad_norm": 4.677193641662598, "learning_rate": 4.226190476190476e-05, "loss": 0.0404, "step": 670 }, { "epoch": 2.4220837043633123, "grad_norm": 2.8398616313934326, "learning_rate": 4.2063492063492065e-05, "loss": 0.0438, "step": 680 }, { "epoch": 2.4577025823686554, "grad_norm": 5.424591064453125, "learning_rate": 4.186507936507937e-05, "loss": 0.0544, "step": 690 }, { "epoch": 2.4933214603739984, "grad_norm": 4.595205307006836, "learning_rate": 4.166666666666667e-05, "loss": 0.0557, "step": 700 }, { "epoch": 2.528940338379341, "grad_norm": 3.7095601558685303, "learning_rate": 4.1468253968253976e-05, "loss": 0.0505, "step": 710 }, { "epoch": 2.564559216384684, "grad_norm": 2.084617853164673, "learning_rate": 4.126984126984127e-05, "loss": 0.037, "step": 720 }, { "epoch": 2.6001780943900266, "grad_norm": 3.442814350128174, "learning_rate": 4.107142857142857e-05, "loss": 0.0399, "step": 730 }, { "epoch": 2.6357969723953696, "grad_norm": 3.6135501861572266, "learning_rate": 4.0873015873015874e-05, "loss": 0.0498, "step": 740 }, { "epoch": 2.671415850400712, "grad_norm": 3.422394275665283, "learning_rate": 4.067460317460318e-05, "loss": 0.0526, "step": 750 }, { "epoch": 2.707034728406055, "grad_norm": 4.467539310455322, "learning_rate": 4.047619047619048e-05, "loss": 0.0392, "step": 760 }, { "epoch": 2.742653606411398, "grad_norm": 2.9515881538391113, "learning_rate": 4.027777777777778e-05, "loss": 0.0468, "step": 770 }, { "epoch": 2.778272484416741, "grad_norm": 4.296915054321289, "learning_rate": 4.007936507936508e-05, "loss": 0.0534, "step": 780 }, { "epoch": 2.813891362422084, "grad_norm": 2.723884344100952, "learning_rate": 3.9880952380952386e-05, "loss": 0.048, "step": 790 }, { "epoch": 2.8495102404274264, "grad_norm": 1.9511892795562744, "learning_rate": 3.968253968253968e-05, "loss": 0.0429, "step": 800 }, { "epoch": 2.8851291184327694, "grad_norm": 2.8428244590759277, "learning_rate": 3.9484126984126986e-05, "loss": 0.0386, "step": 810 }, { "epoch": 2.920747996438112, "grad_norm": 5.256746768951416, "learning_rate": 3.928571428571429e-05, "loss": 0.0421, "step": 820 }, { "epoch": 2.956366874443455, "grad_norm": 2.277379035949707, "learning_rate": 3.908730158730159e-05, "loss": 0.0521, "step": 830 }, { "epoch": 2.991985752448798, "grad_norm": 2.638746500015259, "learning_rate": 3.888888888888889e-05, "loss": 0.0333, "step": 840 }, { "epoch": 2.9991095280498663, "eval_accuracy": 0.9824092629703852, "eval_loss": 0.05514274910092354, "eval_runtime": 167.4573, "eval_samples_per_second": 107.275, "eval_steps_per_second": 1.678, "step": 842 }, { "epoch": 3.0276046304541406, "grad_norm": 2.1443052291870117, "learning_rate": 3.8690476190476195e-05, "loss": 0.0221, "step": 850 }, { "epoch": 3.0632235084594837, "grad_norm": 2.5836849212646484, "learning_rate": 3.84920634920635e-05, "loss": 0.0258, "step": 860 }, { "epoch": 3.0988423864648262, "grad_norm": 4.255194187164307, "learning_rate": 3.8293650793650795e-05, "loss": 0.0269, "step": 870 }, { "epoch": 3.1344612644701693, "grad_norm": 1.1687568426132202, "learning_rate": 3.809523809523809e-05, "loss": 0.0229, "step": 880 }, { "epoch": 3.170080142475512, "grad_norm": 3.9044461250305176, "learning_rate": 3.7896825396825396e-05, "loss": 0.0241, "step": 890 }, { "epoch": 3.205699020480855, "grad_norm": 3.447392225265503, "learning_rate": 3.76984126984127e-05, "loss": 0.0258, "step": 900 }, { "epoch": 3.241317898486198, "grad_norm": 1.6584926843643188, "learning_rate": 3.7500000000000003e-05, "loss": 0.0221, "step": 910 }, { "epoch": 3.2769367764915405, "grad_norm": 6.36900520324707, "learning_rate": 3.730158730158731e-05, "loss": 0.0272, "step": 920 }, { "epoch": 3.3125556544968835, "grad_norm": 1.01226007938385, "learning_rate": 3.7103174603174604e-05, "loss": 0.0242, "step": 930 }, { "epoch": 3.348174532502226, "grad_norm": 0.7302751541137695, "learning_rate": 3.690476190476191e-05, "loss": 0.0276, "step": 940 }, { "epoch": 3.383793410507569, "grad_norm": 5.937874794006348, "learning_rate": 3.6706349206349205e-05, "loss": 0.03, "step": 950 }, { "epoch": 3.4194122885129117, "grad_norm": 3.688383102416992, "learning_rate": 3.650793650793651e-05, "loss": 0.0239, "step": 960 }, { "epoch": 3.4550311665182547, "grad_norm": 8.206720352172852, "learning_rate": 3.630952380952381e-05, "loss": 0.038, "step": 970 }, { "epoch": 3.4906500445235977, "grad_norm": 1.0089924335479736, "learning_rate": 3.611111111111111e-05, "loss": 0.0275, "step": 980 }, { "epoch": 3.5262689225289403, "grad_norm": 4.124368190765381, "learning_rate": 3.591269841269841e-05, "loss": 0.0383, "step": 990 }, { "epoch": 3.5618878005342833, "grad_norm": 4.853917598724365, "learning_rate": 3.571428571428572e-05, "loss": 0.0183, "step": 1000 }, { "epoch": 3.597506678539626, "grad_norm": 0.8174078464508057, "learning_rate": 3.551587301587302e-05, "loss": 0.0176, "step": 1010 }, { "epoch": 3.633125556544969, "grad_norm": 2.9447782039642334, "learning_rate": 3.5317460317460324e-05, "loss": 0.0294, "step": 1020 }, { "epoch": 3.6687444345503115, "grad_norm": 2.926154375076294, "learning_rate": 3.511904761904762e-05, "loss": 0.0257, "step": 1030 }, { "epoch": 3.7043633125556545, "grad_norm": 2.566096305847168, "learning_rate": 3.492063492063492e-05, "loss": 0.0172, "step": 1040 }, { "epoch": 3.7399821905609976, "grad_norm": 3.302891492843628, "learning_rate": 3.472222222222222e-05, "loss": 0.0314, "step": 1050 }, { "epoch": 3.77560106856634, "grad_norm": 1.366655945777893, "learning_rate": 3.4523809523809526e-05, "loss": 0.0243, "step": 1060 }, { "epoch": 3.8112199465716827, "grad_norm": 1.3453521728515625, "learning_rate": 3.432539682539683e-05, "loss": 0.0166, "step": 1070 }, { "epoch": 3.8468388245770258, "grad_norm": 3.0241684913635254, "learning_rate": 3.412698412698413e-05, "loss": 0.0341, "step": 1080 }, { "epoch": 3.8824577025823688, "grad_norm": 4.319317817687988, "learning_rate": 3.392857142857143e-05, "loss": 0.0199, "step": 1090 }, { "epoch": 3.9180765805877114, "grad_norm": 0.412803053855896, "learning_rate": 3.3730158730158734e-05, "loss": 0.0282, "step": 1100 }, { "epoch": 3.9536954585930544, "grad_norm": 2.4635729789733887, "learning_rate": 3.353174603174603e-05, "loss": 0.0201, "step": 1110 }, { "epoch": 3.9893143365983974, "grad_norm": 3.8697004318237305, "learning_rate": 3.3333333333333335e-05, "loss": 0.0332, "step": 1120 }, { "epoch": 4.0, "eval_accuracy": 0.9844689378757515, "eval_loss": 0.05261413753032684, "eval_runtime": 166.8993, "eval_samples_per_second": 107.634, "eval_steps_per_second": 1.684, "step": 1123 }, { "epoch": 4.02493321460374, "grad_norm": 0.7120935916900635, "learning_rate": 3.313492063492064e-05, "loss": 0.0137, "step": 1130 }, { "epoch": 4.060552092609083, "grad_norm": 4.946304798126221, "learning_rate": 3.2936507936507936e-05, "loss": 0.0199, "step": 1140 }, { "epoch": 4.096170970614426, "grad_norm": 3.8117804527282715, "learning_rate": 3.273809523809524e-05, "loss": 0.0177, "step": 1150 }, { "epoch": 4.131789848619769, "grad_norm": 1.8831413984298706, "learning_rate": 3.253968253968254e-05, "loss": 0.0197, "step": 1160 }, { "epoch": 4.167408726625111, "grad_norm": 0.7759003639221191, "learning_rate": 3.234126984126985e-05, "loss": 0.0151, "step": 1170 }, { "epoch": 4.203027604630454, "grad_norm": 4.501601696014404, "learning_rate": 3.2142857142857144e-05, "loss": 0.0148, "step": 1180 }, { "epoch": 4.238646482635797, "grad_norm": 2.070884943008423, "learning_rate": 3.194444444444444e-05, "loss": 0.0176, "step": 1190 }, { "epoch": 4.27426536064114, "grad_norm": 2.1252646446228027, "learning_rate": 3.1746031746031745e-05, "loss": 0.0141, "step": 1200 }, { "epoch": 4.309884238646482, "grad_norm": 2.5485196113586426, "learning_rate": 3.154761904761905e-05, "loss": 0.017, "step": 1210 }, { "epoch": 4.345503116651826, "grad_norm": 2.234440803527832, "learning_rate": 3.134920634920635e-05, "loss": 0.0149, "step": 1220 }, { "epoch": 4.3811219946571685, "grad_norm": 0.32285788655281067, "learning_rate": 3.1150793650793656e-05, "loss": 0.0119, "step": 1230 }, { "epoch": 4.416740872662511, "grad_norm": 1.612191081047058, "learning_rate": 3.095238095238095e-05, "loss": 0.0173, "step": 1240 }, { "epoch": 4.452359750667854, "grad_norm": 5.2718987464904785, "learning_rate": 3.075396825396826e-05, "loss": 0.0201, "step": 1250 }, { "epoch": 4.487978628673197, "grad_norm": 0.8849789500236511, "learning_rate": 3.055555555555556e-05, "loss": 0.0144, "step": 1260 }, { "epoch": 4.52359750667854, "grad_norm": 2.3857195377349854, "learning_rate": 3.0357142857142857e-05, "loss": 0.0202, "step": 1270 }, { "epoch": 4.559216384683882, "grad_norm": 0.264274924993515, "learning_rate": 3.0158730158730158e-05, "loss": 0.0185, "step": 1280 }, { "epoch": 4.594835262689225, "grad_norm": 2.823655605316162, "learning_rate": 2.996031746031746e-05, "loss": 0.0158, "step": 1290 }, { "epoch": 4.630454140694568, "grad_norm": 1.4026707410812378, "learning_rate": 2.9761904761904762e-05, "loss": 0.018, "step": 1300 }, { "epoch": 4.666073018699911, "grad_norm": 3.624246120452881, "learning_rate": 2.9563492063492066e-05, "loss": 0.0113, "step": 1310 }, { "epoch": 4.7016918967052534, "grad_norm": 7.601430892944336, "learning_rate": 2.9365079365079366e-05, "loss": 0.016, "step": 1320 }, { "epoch": 4.737310774710597, "grad_norm": 3.959538221359253, "learning_rate": 2.916666666666667e-05, "loss": 0.023, "step": 1330 }, { "epoch": 4.7729296527159395, "grad_norm": 5.305376052856445, "learning_rate": 2.8968253968253974e-05, "loss": 0.0126, "step": 1340 }, { "epoch": 4.808548530721282, "grad_norm": 3.621107816696167, "learning_rate": 2.876984126984127e-05, "loss": 0.0174, "step": 1350 }, { "epoch": 4.844167408726625, "grad_norm": 1.0203227996826172, "learning_rate": 2.857142857142857e-05, "loss": 0.0152, "step": 1360 }, { "epoch": 4.879786286731968, "grad_norm": 1.6705145835876465, "learning_rate": 2.8373015873015875e-05, "loss": 0.0117, "step": 1370 }, { "epoch": 4.915405164737311, "grad_norm": 0.6089171767234802, "learning_rate": 2.8174603174603175e-05, "loss": 0.0184, "step": 1380 }, { "epoch": 4.951024042742653, "grad_norm": 1.959324836730957, "learning_rate": 2.797619047619048e-05, "loss": 0.0135, "step": 1390 }, { "epoch": 4.986642920747997, "grad_norm": 1.1502999067306519, "learning_rate": 2.777777777777778e-05, "loss": 0.0218, "step": 1400 }, { "epoch": 4.997328584149599, "eval_accuracy": 0.986584279670452, "eval_loss": 0.05108034238219261, "eval_runtime": 172.0304, "eval_samples_per_second": 104.423, "eval_steps_per_second": 1.633, "step": 1403 }, { "epoch": 5.022261798753339, "grad_norm": 1.9193094968795776, "learning_rate": 2.7579365079365083e-05, "loss": 0.0109, "step": 1410 }, { "epoch": 5.057880676758682, "grad_norm": 0.6912887096405029, "learning_rate": 2.7380952380952383e-05, "loss": 0.0092, "step": 1420 }, { "epoch": 5.0934995547640245, "grad_norm": 0.3243821859359741, "learning_rate": 2.718253968253968e-05, "loss": 0.0107, "step": 1430 }, { "epoch": 5.129118432769368, "grad_norm": 1.4951286315917969, "learning_rate": 2.6984126984126984e-05, "loss": 0.0057, "step": 1440 }, { "epoch": 5.1647373107747105, "grad_norm": 1.5525418519973755, "learning_rate": 2.6785714285714288e-05, "loss": 0.0084, "step": 1450 }, { "epoch": 5.200356188780053, "grad_norm": 1.0544503927230835, "learning_rate": 2.6587301587301588e-05, "loss": 0.0143, "step": 1460 }, { "epoch": 5.235975066785397, "grad_norm": 5.903578758239746, "learning_rate": 2.6388888888888892e-05, "loss": 0.0135, "step": 1470 }, { "epoch": 5.271593944790739, "grad_norm": 0.9685418009757996, "learning_rate": 2.6190476190476192e-05, "loss": 0.0122, "step": 1480 }, { "epoch": 5.307212822796082, "grad_norm": 1.8341890573501587, "learning_rate": 2.5992063492063496e-05, "loss": 0.0174, "step": 1490 }, { "epoch": 5.342831700801424, "grad_norm": 1.3028405904769897, "learning_rate": 2.5793650793650796e-05, "loss": 0.0074, "step": 1500 }, { "epoch": 5.378450578806768, "grad_norm": 0.8573488593101501, "learning_rate": 2.5595238095238093e-05, "loss": 0.0123, "step": 1510 }, { "epoch": 5.41406945681211, "grad_norm": 0.8495379686355591, "learning_rate": 2.5396825396825397e-05, "loss": 0.0092, "step": 1520 }, { "epoch": 5.449688334817453, "grad_norm": 4.806192874908447, "learning_rate": 2.5198412698412697e-05, "loss": 0.0117, "step": 1530 }, { "epoch": 5.485307212822796, "grad_norm": 1.6827951669692993, "learning_rate": 2.5e-05, "loss": 0.0109, "step": 1540 }, { "epoch": 5.520926090828139, "grad_norm": 1.2856072187423706, "learning_rate": 2.4801587301587305e-05, "loss": 0.0066, "step": 1550 }, { "epoch": 5.556544968833482, "grad_norm": 1.8056079149246216, "learning_rate": 2.4603174603174602e-05, "loss": 0.0103, "step": 1560 }, { "epoch": 5.592163846838824, "grad_norm": 1.3088467121124268, "learning_rate": 2.4404761904761906e-05, "loss": 0.0071, "step": 1570 }, { "epoch": 5.627782724844168, "grad_norm": 0.6147578954696655, "learning_rate": 2.4206349206349206e-05, "loss": 0.0135, "step": 1580 }, { "epoch": 5.66340160284951, "grad_norm": 3.193516969680786, "learning_rate": 2.400793650793651e-05, "loss": 0.0128, "step": 1590 }, { "epoch": 5.699020480854853, "grad_norm": 1.0563770532608032, "learning_rate": 2.380952380952381e-05, "loss": 0.0116, "step": 1600 }, { "epoch": 5.734639358860196, "grad_norm": 4.569545269012451, "learning_rate": 2.361111111111111e-05, "loss": 0.0134, "step": 1610 }, { "epoch": 5.770258236865539, "grad_norm": 1.7688589096069336, "learning_rate": 2.3412698412698414e-05, "loss": 0.0104, "step": 1620 }, { "epoch": 5.805877114870881, "grad_norm": 0.6101610064506531, "learning_rate": 2.3214285714285715e-05, "loss": 0.0124, "step": 1630 }, { "epoch": 5.841495992876224, "grad_norm": 6.201462745666504, "learning_rate": 2.3015873015873015e-05, "loss": 0.0154, "step": 1640 }, { "epoch": 5.8771148708815675, "grad_norm": 0.349431574344635, "learning_rate": 2.281746031746032e-05, "loss": 0.0073, "step": 1650 }, { "epoch": 5.91273374888691, "grad_norm": 2.9831745624542236, "learning_rate": 2.261904761904762e-05, "loss": 0.0116, "step": 1660 }, { "epoch": 5.948352626892253, "grad_norm": 2.7858543395996094, "learning_rate": 2.2420634920634923e-05, "loss": 0.0099, "step": 1670 }, { "epoch": 5.983971504897596, "grad_norm": 1.6756843328475952, "learning_rate": 2.2222222222222223e-05, "loss": 0.0086, "step": 1680 }, { "epoch": 5.998219056099733, "eval_accuracy": 0.9873079492317969, "eval_loss": 0.051540642976760864, "eval_runtime": 168.5925, "eval_samples_per_second": 106.553, "eval_steps_per_second": 1.667, "step": 1684 }, { "epoch": 6.019590382902939, "grad_norm": 3.378427028656006, "learning_rate": 2.2023809523809524e-05, "loss": 0.0089, "step": 1690 }, { "epoch": 6.055209260908281, "grad_norm": 2.4920456409454346, "learning_rate": 2.1825396825396827e-05, "loss": 0.0096, "step": 1700 }, { "epoch": 6.090828138913624, "grad_norm": 2.810772657394409, "learning_rate": 2.1626984126984128e-05, "loss": 0.0102, "step": 1710 }, { "epoch": 6.126447016918967, "grad_norm": 0.348102331161499, "learning_rate": 2.1428571428571428e-05, "loss": 0.0146, "step": 1720 }, { "epoch": 6.16206589492431, "grad_norm": 1.5229803323745728, "learning_rate": 2.1230158730158732e-05, "loss": 0.0128, "step": 1730 }, { "epoch": 6.1976847729296525, "grad_norm": 1.736290693283081, "learning_rate": 2.1031746031746032e-05, "loss": 0.011, "step": 1740 }, { "epoch": 6.233303650934996, "grad_norm": 0.5413634777069092, "learning_rate": 2.0833333333333336e-05, "loss": 0.006, "step": 1750 }, { "epoch": 6.2689225289403385, "grad_norm": 1.1734927892684937, "learning_rate": 2.0634920634920636e-05, "loss": 0.0087, "step": 1760 }, { "epoch": 6.304541406945681, "grad_norm": 0.21049003303050995, "learning_rate": 2.0436507936507937e-05, "loss": 0.0074, "step": 1770 }, { "epoch": 6.340160284951024, "grad_norm": 1.5480449199676514, "learning_rate": 2.023809523809524e-05, "loss": 0.0072, "step": 1780 }, { "epoch": 6.375779162956367, "grad_norm": 1.130774736404419, "learning_rate": 2.003968253968254e-05, "loss": 0.0082, "step": 1790 }, { "epoch": 6.41139804096171, "grad_norm": 0.9111453890800476, "learning_rate": 1.984126984126984e-05, "loss": 0.0081, "step": 1800 }, { "epoch": 6.447016918967052, "grad_norm": 0.12502439320087433, "learning_rate": 1.9642857142857145e-05, "loss": 0.0095, "step": 1810 }, { "epoch": 6.482635796972396, "grad_norm": 0.832012414932251, "learning_rate": 1.9444444444444445e-05, "loss": 0.0042, "step": 1820 }, { "epoch": 6.518254674977738, "grad_norm": 3.6857128143310547, "learning_rate": 1.924603174603175e-05, "loss": 0.0081, "step": 1830 }, { "epoch": 6.553873552983081, "grad_norm": 2.5364391803741455, "learning_rate": 1.9047619047619046e-05, "loss": 0.0089, "step": 1840 }, { "epoch": 6.5894924309884235, "grad_norm": 23.89657211303711, "learning_rate": 1.884920634920635e-05, "loss": 0.0118, "step": 1850 }, { "epoch": 6.625111308993767, "grad_norm": 2.562563180923462, "learning_rate": 1.8650793650793654e-05, "loss": 0.0096, "step": 1860 }, { "epoch": 6.66073018699911, "grad_norm": 3.3216705322265625, "learning_rate": 1.8452380952380954e-05, "loss": 0.0113, "step": 1870 }, { "epoch": 6.696349065004452, "grad_norm": 0.8016022443771362, "learning_rate": 1.8253968253968254e-05, "loss": 0.0054, "step": 1880 }, { "epoch": 6.731967943009796, "grad_norm": 3.4590423107147217, "learning_rate": 1.8055555555555555e-05, "loss": 0.0071, "step": 1890 }, { "epoch": 6.767586821015138, "grad_norm": 0.12237533181905746, "learning_rate": 1.785714285714286e-05, "loss": 0.0073, "step": 1900 }, { "epoch": 6.803205699020481, "grad_norm": 1.500787615776062, "learning_rate": 1.7658730158730162e-05, "loss": 0.0055, "step": 1910 }, { "epoch": 6.838824577025823, "grad_norm": 3.2452268600463867, "learning_rate": 1.746031746031746e-05, "loss": 0.0112, "step": 1920 }, { "epoch": 6.874443455031167, "grad_norm": 0.4780968129634857, "learning_rate": 1.7261904761904763e-05, "loss": 0.0074, "step": 1930 }, { "epoch": 6.910062333036509, "grad_norm": 0.4808253347873688, "learning_rate": 1.7063492063492063e-05, "loss": 0.0096, "step": 1940 }, { "epoch": 6.945681211041852, "grad_norm": 1.5511194467544556, "learning_rate": 1.6865079365079367e-05, "loss": 0.0047, "step": 1950 }, { "epoch": 6.9813000890471955, "grad_norm": 0.8193422555923462, "learning_rate": 1.6666666666666667e-05, "loss": 0.0057, "step": 1960 }, { "epoch": 6.999109528049867, "eval_accuracy": 0.9875306167891338, "eval_loss": 0.04618992656469345, "eval_runtime": 166.9392, "eval_samples_per_second": 107.608, "eval_steps_per_second": 1.683, "step": 1965 }, { "epoch": 7.016918967052538, "grad_norm": 0.1917089819908142, "learning_rate": 1.6468253968253968e-05, "loss": 0.0042, "step": 1970 }, { "epoch": 7.052537845057881, "grad_norm": 0.48844099044799805, "learning_rate": 1.626984126984127e-05, "loss": 0.0053, "step": 1980 }, { "epoch": 7.088156723063223, "grad_norm": 1.94171941280365, "learning_rate": 1.6071428571428572e-05, "loss": 0.0064, "step": 1990 }, { "epoch": 7.123775601068567, "grad_norm": 10.181970596313477, "learning_rate": 1.5873015873015872e-05, "loss": 0.0072, "step": 2000 }, { "epoch": 7.159394479073909, "grad_norm": 0.4458209276199341, "learning_rate": 1.5674603174603176e-05, "loss": 0.0086, "step": 2010 }, { "epoch": 7.195013357079252, "grad_norm": 1.125260829925537, "learning_rate": 1.5476190476190476e-05, "loss": 0.0064, "step": 2020 }, { "epoch": 7.230632235084594, "grad_norm": 2.555405616760254, "learning_rate": 1.527777777777778e-05, "loss": 0.0047, "step": 2030 }, { "epoch": 7.266251113089938, "grad_norm": 0.7010562419891357, "learning_rate": 1.5079365079365079e-05, "loss": 0.0027, "step": 2040 }, { "epoch": 7.3018699910952805, "grad_norm": 0.19031189382076263, "learning_rate": 1.4880952380952381e-05, "loss": 0.004, "step": 2050 }, { "epoch": 7.337488869100623, "grad_norm": 0.25679388642311096, "learning_rate": 1.4682539682539683e-05, "loss": 0.0024, "step": 2060 }, { "epoch": 7.3731077471059665, "grad_norm": 0.22514384984970093, "learning_rate": 1.4484126984126987e-05, "loss": 0.0022, "step": 2070 }, { "epoch": 7.408726625111309, "grad_norm": 1.700714111328125, "learning_rate": 1.4285714285714285e-05, "loss": 0.0069, "step": 2080 }, { "epoch": 7.444345503116652, "grad_norm": 3.598724842071533, "learning_rate": 1.4087301587301587e-05, "loss": 0.0043, "step": 2090 }, { "epoch": 7.479964381121994, "grad_norm": 0.9559303522109985, "learning_rate": 1.388888888888889e-05, "loss": 0.0069, "step": 2100 }, { "epoch": 7.515583259127338, "grad_norm": 2.183023452758789, "learning_rate": 1.3690476190476192e-05, "loss": 0.0043, "step": 2110 }, { "epoch": 7.55120213713268, "grad_norm": 0.6762595772743225, "learning_rate": 1.3492063492063492e-05, "loss": 0.0054, "step": 2120 }, { "epoch": 7.586821015138023, "grad_norm": 0.12739060819149017, "learning_rate": 1.3293650793650794e-05, "loss": 0.0059, "step": 2130 }, { "epoch": 7.622439893143366, "grad_norm": 0.8051192164421082, "learning_rate": 1.3095238095238096e-05, "loss": 0.0014, "step": 2140 }, { "epoch": 7.658058771148709, "grad_norm": 1.1333496570587158, "learning_rate": 1.2896825396825398e-05, "loss": 0.0072, "step": 2150 }, { "epoch": 7.6936776491540515, "grad_norm": 0.06419402360916138, "learning_rate": 1.2698412698412699e-05, "loss": 0.0044, "step": 2160 }, { "epoch": 7.729296527159395, "grad_norm": 1.2567334175109863, "learning_rate": 1.25e-05, "loss": 0.0028, "step": 2170 }, { "epoch": 7.7649154051647375, "grad_norm": 4.354968070983887, "learning_rate": 1.2301587301587301e-05, "loss": 0.0062, "step": 2180 }, { "epoch": 7.80053428317008, "grad_norm": 0.17071713507175446, "learning_rate": 1.2103174603174603e-05, "loss": 0.0045, "step": 2190 }, { "epoch": 7.836153161175423, "grad_norm": 0.057894542813301086, "learning_rate": 1.1904761904761905e-05, "loss": 0.0068, "step": 2200 }, { "epoch": 7.871772039180766, "grad_norm": 4.474624156951904, "learning_rate": 1.1706349206349207e-05, "loss": 0.0043, "step": 2210 }, { "epoch": 7.907390917186109, "grad_norm": 0.9991622567176819, "learning_rate": 1.1507936507936508e-05, "loss": 0.0074, "step": 2220 }, { "epoch": 7.943009795191451, "grad_norm": 0.0822732001543045, "learning_rate": 1.130952380952381e-05, "loss": 0.0041, "step": 2230 }, { "epoch": 7.978628673196794, "grad_norm": 1.9230424165725708, "learning_rate": 1.1111111111111112e-05, "loss": 0.0043, "step": 2240 }, { "epoch": 8.0, "eval_accuracy": 0.9890892896904921, "eval_loss": 0.045326121151447296, "eval_runtime": 168.1961, "eval_samples_per_second": 106.804, "eval_steps_per_second": 1.671, "step": 2246 }, { "epoch": 8.014247551202137, "grad_norm": 2.2943098545074463, "learning_rate": 1.0912698412698414e-05, "loss": 0.0047, "step": 2250 }, { "epoch": 8.04986642920748, "grad_norm": 0.5264036059379578, "learning_rate": 1.0714285714285714e-05, "loss": 0.0038, "step": 2260 }, { "epoch": 8.085485307212823, "grad_norm": 1.6306657791137695, "learning_rate": 1.0515873015873016e-05, "loss": 0.0023, "step": 2270 }, { "epoch": 8.121104185218165, "grad_norm": 0.7527822852134705, "learning_rate": 1.0317460317460318e-05, "loss": 0.0034, "step": 2280 }, { "epoch": 8.156723063223508, "grad_norm": 1.7613914012908936, "learning_rate": 1.011904761904762e-05, "loss": 0.0103, "step": 2290 }, { "epoch": 8.192341941228852, "grad_norm": 0.3057126998901367, "learning_rate": 9.92063492063492e-06, "loss": 0.002, "step": 2300 }, { "epoch": 8.227960819234195, "grad_norm": 0.1171383410692215, "learning_rate": 9.722222222222223e-06, "loss": 0.0038, "step": 2310 }, { "epoch": 8.263579697239537, "grad_norm": 0.17709870636463165, "learning_rate": 9.523809523809523e-06, "loss": 0.0104, "step": 2320 }, { "epoch": 8.29919857524488, "grad_norm": 2.5424516201019287, "learning_rate": 9.325396825396827e-06, "loss": 0.0028, "step": 2330 }, { "epoch": 8.334817453250222, "grad_norm": 1.413736343383789, "learning_rate": 9.126984126984127e-06, "loss": 0.0019, "step": 2340 }, { "epoch": 8.370436331255565, "grad_norm": 0.8712714910507202, "learning_rate": 8.92857142857143e-06, "loss": 0.0044, "step": 2350 }, { "epoch": 8.406055209260908, "grad_norm": 5.921164035797119, "learning_rate": 8.73015873015873e-06, "loss": 0.0027, "step": 2360 }, { "epoch": 8.441674087266252, "grad_norm": 0.6600878238677979, "learning_rate": 8.531746031746032e-06, "loss": 0.0094, "step": 2370 }, { "epoch": 8.477292965271594, "grad_norm": 0.046510376036167145, "learning_rate": 8.333333333333334e-06, "loss": 0.0073, "step": 2380 }, { "epoch": 8.512911843276937, "grad_norm": 0.2806013822555542, "learning_rate": 8.134920634920636e-06, "loss": 0.0074, "step": 2390 }, { "epoch": 8.54853072128228, "grad_norm": 0.09230457246303558, "learning_rate": 7.936507936507936e-06, "loss": 0.0053, "step": 2400 }, { "epoch": 8.584149599287622, "grad_norm": 1.893324851989746, "learning_rate": 7.738095238095238e-06, "loss": 0.006, "step": 2410 }, { "epoch": 8.619768477292965, "grad_norm": 1.035352349281311, "learning_rate": 7.5396825396825394e-06, "loss": 0.0056, "step": 2420 }, { "epoch": 8.655387355298307, "grad_norm": 2.519357204437256, "learning_rate": 7.3412698412698415e-06, "loss": 0.0019, "step": 2430 }, { "epoch": 8.691006233303652, "grad_norm": 0.36915910243988037, "learning_rate": 7.142857142857143e-06, "loss": 0.0053, "step": 2440 }, { "epoch": 8.726625111308994, "grad_norm": 2.2446630001068115, "learning_rate": 6.944444444444445e-06, "loss": 0.0023, "step": 2450 }, { "epoch": 8.762243989314337, "grad_norm": 0.3775590658187866, "learning_rate": 6.746031746031746e-06, "loss": 0.0039, "step": 2460 }, { "epoch": 8.79786286731968, "grad_norm": 3.002919912338257, "learning_rate": 6.547619047619048e-06, "loss": 0.0047, "step": 2470 }, { "epoch": 8.833481745325022, "grad_norm": 2.3306655883789062, "learning_rate": 6.349206349206349e-06, "loss": 0.0066, "step": 2480 }, { "epoch": 8.869100623330365, "grad_norm": 0.08028317987918854, "learning_rate": 6.1507936507936505e-06, "loss": 0.0035, "step": 2490 }, { "epoch": 8.904719501335707, "grad_norm": 1.2194794416427612, "learning_rate": 5.9523809523809525e-06, "loss": 0.0045, "step": 2500 }, { "epoch": 8.940338379341052, "grad_norm": 1.457444429397583, "learning_rate": 5.753968253968254e-06, "loss": 0.0042, "step": 2510 }, { "epoch": 8.975957257346394, "grad_norm": 1.0034886598587036, "learning_rate": 5.555555555555556e-06, "loss": 0.0012, "step": 2520 }, { "epoch": 8.9973285841496, "eval_accuracy": 0.988810955243821, "eval_loss": 0.04604629799723625, "eval_runtime": 167.0665, "eval_samples_per_second": 107.526, "eval_steps_per_second": 1.682, "step": 2526 }, { "epoch": 9.011576135351737, "grad_norm": 0.16471421718597412, "learning_rate": 5.357142857142857e-06, "loss": 0.0059, "step": 2530 }, { "epoch": 9.04719501335708, "grad_norm": 0.5944855213165283, "learning_rate": 5.158730158730159e-06, "loss": 0.0068, "step": 2540 }, { "epoch": 9.082813891362422, "grad_norm": 1.3986272811889648, "learning_rate": 4.96031746031746e-06, "loss": 0.0023, "step": 2550 }, { "epoch": 9.118432769367764, "grad_norm": 0.8911547660827637, "learning_rate": 4.7619047619047615e-06, "loss": 0.0023, "step": 2560 }, { "epoch": 9.154051647373107, "grad_norm": 2.983407735824585, "learning_rate": 4.563492063492064e-06, "loss": 0.0039, "step": 2570 }, { "epoch": 9.189670525378451, "grad_norm": 0.6036033630371094, "learning_rate": 4.365079365079365e-06, "loss": 0.0065, "step": 2580 }, { "epoch": 9.225289403383794, "grad_norm": 9.958078384399414, "learning_rate": 4.166666666666667e-06, "loss": 0.0073, "step": 2590 }, { "epoch": 9.260908281389137, "grad_norm": 0.27652016282081604, "learning_rate": 3.968253968253968e-06, "loss": 0.003, "step": 2600 }, { "epoch": 9.29652715939448, "grad_norm": 0.2213822901248932, "learning_rate": 3.7698412698412697e-06, "loss": 0.0015, "step": 2610 }, { "epoch": 9.332146037399822, "grad_norm": 0.18189971148967743, "learning_rate": 3.5714285714285714e-06, "loss": 0.0019, "step": 2620 }, { "epoch": 9.367764915405164, "grad_norm": 0.05522046610713005, "learning_rate": 3.373015873015873e-06, "loss": 0.001, "step": 2630 }, { "epoch": 9.403383793410507, "grad_norm": 2.0256989002227783, "learning_rate": 3.1746031746031746e-06, "loss": 0.0069, "step": 2640 }, { "epoch": 9.439002671415851, "grad_norm": 3.011732339859009, "learning_rate": 2.9761904761904763e-06, "loss": 0.0042, "step": 2650 }, { "epoch": 9.474621549421194, "grad_norm": 0.815857470035553, "learning_rate": 2.777777777777778e-06, "loss": 0.0021, "step": 2660 }, { "epoch": 9.510240427426536, "grad_norm": 1.9958930015563965, "learning_rate": 2.5793650793650795e-06, "loss": 0.0008, "step": 2670 }, { "epoch": 9.545859305431879, "grad_norm": 0.05693604424595833, "learning_rate": 2.3809523809523808e-06, "loss": 0.0028, "step": 2680 }, { "epoch": 9.581478183437222, "grad_norm": 4.779353618621826, "learning_rate": 2.1825396825396824e-06, "loss": 0.0071, "step": 2690 }, { "epoch": 9.617097061442564, "grad_norm": 0.09340893477201462, "learning_rate": 1.984126984126984e-06, "loss": 0.0029, "step": 2700 }, { "epoch": 9.652715939447907, "grad_norm": 1.627168893814087, "learning_rate": 1.7857142857142857e-06, "loss": 0.0038, "step": 2710 }, { "epoch": 9.68833481745325, "grad_norm": 0.4794100224971771, "learning_rate": 1.5873015873015873e-06, "loss": 0.0033, "step": 2720 }, { "epoch": 9.723953695458594, "grad_norm": 0.7291146516799927, "learning_rate": 1.388888888888889e-06, "loss": 0.0024, "step": 2730 }, { "epoch": 9.759572573463936, "grad_norm": 0.5635161399841309, "learning_rate": 1.1904761904761904e-06, "loss": 0.0009, "step": 2740 }, { "epoch": 9.795191451469279, "grad_norm": 0.027575811371207237, "learning_rate": 9.92063492063492e-07, "loss": 0.0015, "step": 2750 }, { "epoch": 9.830810329474621, "grad_norm": 0.4068202078342438, "learning_rate": 7.936507936507937e-07, "loss": 0.0019, "step": 2760 }, { "epoch": 9.866429207479964, "grad_norm": 0.08091460168361664, "learning_rate": 5.952380952380952e-07, "loss": 0.0021, "step": 2770 }, { "epoch": 9.902048085485307, "grad_norm": 0.043514594435691833, "learning_rate": 3.9682539682539683e-07, "loss": 0.005, "step": 2780 }, { "epoch": 9.93766696349065, "grad_norm": 0.17870892584323883, "learning_rate": 1.9841269841269841e-07, "loss": 0.0024, "step": 2790 }, { "epoch": 9.973285841495994, "grad_norm": 0.13601838052272797, "learning_rate": 0.0, "loss": 0.0017, "step": 2800 }, { "epoch": 9.973285841495994, "eval_accuracy": 0.9900356268091739, "eval_loss": 0.04398982226848602, "eval_runtime": 167.3655, "eval_samples_per_second": 107.334, "eval_steps_per_second": 1.679, "step": 2800 }, { "epoch": 9.973285841495994, "step": 2800, "total_flos": 1.7813294264095949e+19, "train_loss": 0.08161249804892577, "train_runtime": 11582.5203, "train_samples_per_second": 62.035, "train_steps_per_second": 0.242 } ], "logging_steps": 10, "max_steps": 2800, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7813294264095949e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }