{ "best_metric": 0.7932489451476793, "best_model_checkpoint": "distilbert-base-multilingual-cased-hyper-matt/run-jc9uav7l/checkpoint-1600", "epoch": 4.0, "eval_steps": 500, "global_step": 1600, "is_hyper_param_search": true, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.025, "grad_norm": 3.898715019226074, "learning_rate": 5.237000370669628e-05, "loss": 0.5804, "step": 10 }, { "epoch": 0.05, "grad_norm": 3.486314058303833, "learning_rate": 5.2106837858923936e-05, "loss": 0.5382, "step": 20 }, { "epoch": 0.075, "grad_norm": 5.969634056091309, "learning_rate": 5.18436720111516e-05, "loss": 0.4842, "step": 30 }, { "epoch": 0.1, "grad_norm": 3.656458854675293, "learning_rate": 5.1580506163379254e-05, "loss": 0.4253, "step": 40 }, { "epoch": 0.125, "grad_norm": 0.4058239161968231, "learning_rate": 5.131734031560691e-05, "loss": 0.4599, "step": 50 }, { "epoch": 0.15, "grad_norm": 2.092224597930908, "learning_rate": 5.1054174467834564e-05, "loss": 0.4936, "step": 60 }, { "epoch": 0.175, "grad_norm": 7.18532657623291, "learning_rate": 5.079100862006222e-05, "loss": 0.529, "step": 70 }, { "epoch": 0.2, "grad_norm": 5.497727870941162, "learning_rate": 5.052784277228988e-05, "loss": 0.4603, "step": 80 }, { "epoch": 0.225, "grad_norm": 14.79340934753418, "learning_rate": 5.026467692451754e-05, "loss": 0.6846, "step": 90 }, { "epoch": 0.25, "grad_norm": 2.487825632095337, "learning_rate": 5.000151107674519e-05, "loss": 0.5422, "step": 100 }, { "epoch": 0.275, "grad_norm": 1.7481558322906494, "learning_rate": 4.973834522897285e-05, "loss": 0.4124, "step": 110 }, { "epoch": 0.3, "grad_norm": 1.459429144859314, "learning_rate": 4.94751793812005e-05, "loss": 0.5866, "step": 120 }, { "epoch": 0.325, "grad_norm": 1.4248297214508057, "learning_rate": 4.9212013533428165e-05, "loss": 0.3253, "step": 130 }, { "epoch": 0.35, "grad_norm": 18.61319923400879, "learning_rate": 4.894884768565583e-05, "loss": 0.4766, "step": 140 }, { "epoch": 0.375, "grad_norm": 23.339385986328125, "learning_rate": 4.868568183788348e-05, "loss": 1.1408, "step": 150 }, { "epoch": 0.4, "grad_norm": 1.9814672470092773, "learning_rate": 4.842251599011114e-05, "loss": 0.6388, "step": 160 }, { "epoch": 0.425, "grad_norm": 10.203920364379883, "learning_rate": 4.815935014233879e-05, "loss": 0.4511, "step": 170 }, { "epoch": 0.45, "grad_norm": 4.456247806549072, "learning_rate": 4.789618429456645e-05, "loss": 0.3185, "step": 180 }, { "epoch": 0.475, "grad_norm": 0.4962649941444397, "learning_rate": 4.763301844679411e-05, "loss": 0.522, "step": 190 }, { "epoch": 0.5, "grad_norm": 6.5583815574646, "learning_rate": 4.7369852599021765e-05, "loss": 0.87, "step": 200 }, { "epoch": 0.525, "grad_norm": 5.371535301208496, "learning_rate": 4.710668675124942e-05, "loss": 0.478, "step": 210 }, { "epoch": 0.55, "grad_norm": 2.1848862171173096, "learning_rate": 4.6843520903477076e-05, "loss": 0.4016, "step": 220 }, { "epoch": 0.575, "grad_norm": 0.9754756093025208, "learning_rate": 4.658035505570473e-05, "loss": 0.2634, "step": 230 }, { "epoch": 0.6, "grad_norm": 0.6011353731155396, "learning_rate": 4.631718920793239e-05, "loss": 0.6037, "step": 240 }, { "epoch": 0.625, "grad_norm": 14.307558059692383, "learning_rate": 4.605402336016005e-05, "loss": 0.5226, "step": 250 }, { "epoch": 0.65, "grad_norm": 1.4895697832107544, "learning_rate": 4.5790857512387704e-05, "loss": 0.3069, "step": 260 }, { "epoch": 0.675, "grad_norm": 1.63743257522583, "learning_rate": 4.552769166461536e-05, "loss": 0.5554, "step": 270 }, { "epoch": 0.7, "grad_norm": 1.2908447980880737, "learning_rate": 4.5264525816843014e-05, "loss": 0.5409, "step": 280 }, { "epoch": 0.725, "grad_norm": 104.72332763671875, "learning_rate": 4.5001359969070676e-05, "loss": 0.4431, "step": 290 }, { "epoch": 0.75, "grad_norm": 38.51509475708008, "learning_rate": 4.473819412129833e-05, "loss": 0.8298, "step": 300 }, { "epoch": 0.775, "grad_norm": 0.7204974889755249, "learning_rate": 4.447502827352599e-05, "loss": 1.0198, "step": 310 }, { "epoch": 0.8, "grad_norm": 4.1109700202941895, "learning_rate": 4.421186242575364e-05, "loss": 0.4, "step": 320 }, { "epoch": 0.825, "grad_norm": 1.3143742084503174, "learning_rate": 4.39486965779813e-05, "loss": 0.6479, "step": 330 }, { "epoch": 0.85, "grad_norm": 0.47246071696281433, "learning_rate": 4.368553073020896e-05, "loss": 0.1606, "step": 340 }, { "epoch": 0.875, "grad_norm": 0.24373719096183777, "learning_rate": 4.3422364882436615e-05, "loss": 0.5316, "step": 350 }, { "epoch": 0.9, "grad_norm": 6.273545265197754, "learning_rate": 4.315919903466427e-05, "loss": 0.6365, "step": 360 }, { "epoch": 0.925, "grad_norm": 0.4661806523799896, "learning_rate": 4.2896033186891925e-05, "loss": 0.4455, "step": 370 }, { "epoch": 0.95, "grad_norm": 0.7078198194503784, "learning_rate": 4.263286733911959e-05, "loss": 0.3436, "step": 380 }, { "epoch": 0.975, "grad_norm": 0.8958209753036499, "learning_rate": 4.236970149134725e-05, "loss": 0.6337, "step": 390 }, { "epoch": 1.0, "grad_norm": 15.921177864074707, "learning_rate": 4.2106535643574905e-05, "loss": 0.7286, "step": 400 }, { "epoch": 1.0, "eval_accuracy": 0.825, "eval_f1": 0.6276595744680851, "eval_loss": 0.6047177314758301, "eval_precision": 0.9076923076923077, "eval_recall": 0.4796747967479675, "eval_runtime": 1.5265, "eval_samples_per_second": 262.045, "eval_steps_per_second": 16.378, "step": 400 }, { "epoch": 1.025, "grad_norm": 0.5335336327552795, "learning_rate": 4.184336979580256e-05, "loss": 0.4385, "step": 410 }, { "epoch": 1.05, "grad_norm": 82.54154968261719, "learning_rate": 4.1580203948030215e-05, "loss": 0.3295, "step": 420 }, { "epoch": 1.075, "grad_norm": 5.63857889175415, "learning_rate": 4.131703810025787e-05, "loss": 0.4648, "step": 430 }, { "epoch": 1.1, "grad_norm": 85.73626708984375, "learning_rate": 4.105387225248553e-05, "loss": 0.5061, "step": 440 }, { "epoch": 1.125, "grad_norm": 0.2682015597820282, "learning_rate": 4.079070640471319e-05, "loss": 0.3782, "step": 450 }, { "epoch": 1.15, "grad_norm": 24.346281051635742, "learning_rate": 4.052754055694084e-05, "loss": 0.1474, "step": 460 }, { "epoch": 1.175, "grad_norm": 0.15391361713409424, "learning_rate": 4.02643747091685e-05, "loss": 0.4207, "step": 470 }, { "epoch": 1.2, "grad_norm": 0.11409182101488113, "learning_rate": 4.0001208861396154e-05, "loss": 0.4453, "step": 480 }, { "epoch": 1.225, "grad_norm": 0.14815831184387207, "learning_rate": 3.9738043013623816e-05, "loss": 0.4236, "step": 490 }, { "epoch": 1.25, "grad_norm": 5.3385539054870605, "learning_rate": 3.947487716585147e-05, "loss": 0.3188, "step": 500 }, { "epoch": 1.275, "grad_norm": 5.233155250549316, "learning_rate": 3.9211711318079126e-05, "loss": 0.7307, "step": 510 }, { "epoch": 1.3, "grad_norm": 0.8003888130187988, "learning_rate": 3.894854547030678e-05, "loss": 0.3721, "step": 520 }, { "epoch": 1.325, "grad_norm": 0.12942475080490112, "learning_rate": 3.868537962253444e-05, "loss": 0.061, "step": 530 }, { "epoch": 1.35, "grad_norm": 0.29207348823547363, "learning_rate": 3.84222137747621e-05, "loss": 0.2718, "step": 540 }, { "epoch": 1.375, "grad_norm": 0.2017810046672821, "learning_rate": 3.8159047926989754e-05, "loss": 0.4297, "step": 550 }, { "epoch": 1.4, "grad_norm": 0.2950953245162964, "learning_rate": 3.789588207921741e-05, "loss": 0.0521, "step": 560 }, { "epoch": 1.425, "grad_norm": 6.70197057723999, "learning_rate": 3.7632716231445065e-05, "loss": 0.4555, "step": 570 }, { "epoch": 1.45, "grad_norm": 28.17333984375, "learning_rate": 3.736955038367272e-05, "loss": 0.4297, "step": 580 }, { "epoch": 1.475, "grad_norm": 0.12028508633375168, "learning_rate": 3.710638453590038e-05, "loss": 0.1992, "step": 590 }, { "epoch": 1.5, "grad_norm": 0.2757216691970825, "learning_rate": 3.684321868812804e-05, "loss": 0.2196, "step": 600 }, { "epoch": 1.525, "grad_norm": 0.12552417814731598, "learning_rate": 3.658005284035569e-05, "loss": 0.5815, "step": 610 }, { "epoch": 1.55, "grad_norm": 4.250138282775879, "learning_rate": 3.631688699258335e-05, "loss": 0.5406, "step": 620 }, { "epoch": 1.575, "grad_norm": 42.90773010253906, "learning_rate": 3.605372114481101e-05, "loss": 0.2289, "step": 630 }, { "epoch": 1.6, "grad_norm": 0.2683817446231842, "learning_rate": 3.5790555297038665e-05, "loss": 0.2187, "step": 640 }, { "epoch": 1.625, "grad_norm": 0.16949182748794556, "learning_rate": 3.552738944926633e-05, "loss": 0.3488, "step": 650 }, { "epoch": 1.65, "grad_norm": 28.463308334350586, "learning_rate": 3.526422360149398e-05, "loss": 0.3393, "step": 660 }, { "epoch": 1.675, "grad_norm": 8.720256805419922, "learning_rate": 3.500105775372164e-05, "loss": 0.4792, "step": 670 }, { "epoch": 1.7, "grad_norm": 0.81484055519104, "learning_rate": 3.473789190594929e-05, "loss": 0.4159, "step": 680 }, { "epoch": 1.725, "grad_norm": 0.22710223495960236, "learning_rate": 3.447472605817695e-05, "loss": 0.1826, "step": 690 }, { "epoch": 1.75, "grad_norm": 0.8144286274909973, "learning_rate": 3.421156021040461e-05, "loss": 0.3352, "step": 700 }, { "epoch": 1.775, "grad_norm": 0.3589684069156647, "learning_rate": 3.3948394362632266e-05, "loss": 0.7971, "step": 710 }, { "epoch": 1.8, "grad_norm": 20.449018478393555, "learning_rate": 3.368522851485992e-05, "loss": 0.5189, "step": 720 }, { "epoch": 1.825, "grad_norm": 0.6803117394447327, "learning_rate": 3.3422062667087576e-05, "loss": 0.2641, "step": 730 }, { "epoch": 1.85, "grad_norm": 5.5921502113342285, "learning_rate": 3.315889681931523e-05, "loss": 0.1776, "step": 740 }, { "epoch": 1.875, "grad_norm": 15.335833549499512, "learning_rate": 3.2895730971542894e-05, "loss": 0.6454, "step": 750 }, { "epoch": 1.9, "grad_norm": 24.82597541809082, "learning_rate": 3.263256512377055e-05, "loss": 0.3175, "step": 760 }, { "epoch": 1.925, "grad_norm": 0.3673095107078552, "learning_rate": 3.2369399275998204e-05, "loss": 0.1565, "step": 770 }, { "epoch": 1.95, "grad_norm": 0.30511701107025146, "learning_rate": 3.210623342822586e-05, "loss": 0.3474, "step": 780 }, { "epoch": 1.975, "grad_norm": 38.377174377441406, "learning_rate": 3.1843067580453515e-05, "loss": 0.5005, "step": 790 }, { "epoch": 2.0, "grad_norm": 0.5073786973953247, "learning_rate": 3.157990173268118e-05, "loss": 0.7247, "step": 800 }, { "epoch": 2.0, "eval_accuracy": 0.865, "eval_f1": 0.773109243697479, "eval_loss": 0.45795485377311707, "eval_precision": 0.8, "eval_recall": 0.7479674796747967, "eval_runtime": 1.5384, "eval_samples_per_second": 260.012, "eval_steps_per_second": 16.251, "step": 800 }, { "epoch": 2.025, "grad_norm": 14.978399276733398, "learning_rate": 3.131673588490883e-05, "loss": 0.2517, "step": 810 }, { "epoch": 2.05, "grad_norm": 0.3453332185745239, "learning_rate": 3.105357003713649e-05, "loss": 0.2949, "step": 820 }, { "epoch": 2.075, "grad_norm": 0.21060849726200104, "learning_rate": 3.079040418936414e-05, "loss": 0.2702, "step": 830 }, { "epoch": 2.1, "grad_norm": 48.51129913330078, "learning_rate": 3.05272383415918e-05, "loss": 0.0914, "step": 840 }, { "epoch": 2.125, "grad_norm": 16.647279739379883, "learning_rate": 3.0264072493819457e-05, "loss": 0.2664, "step": 850 }, { "epoch": 2.15, "grad_norm": 0.24965181946754456, "learning_rate": 3.0000906646047115e-05, "loss": 0.8968, "step": 860 }, { "epoch": 2.175, "grad_norm": 1.752435564994812, "learning_rate": 2.973774079827477e-05, "loss": 0.5488, "step": 870 }, { "epoch": 2.2, "grad_norm": 0.20626233518123627, "learning_rate": 2.9474574950502433e-05, "loss": 0.3845, "step": 880 }, { "epoch": 2.225, "grad_norm": 39.55342483520508, "learning_rate": 2.921140910273009e-05, "loss": 0.255, "step": 890 }, { "epoch": 2.25, "grad_norm": 0.6314402222633362, "learning_rate": 2.8948243254957747e-05, "loss": 0.2481, "step": 900 }, { "epoch": 2.275, "grad_norm": 4.929794788360596, "learning_rate": 2.8685077407185402e-05, "loss": 0.3894, "step": 910 }, { "epoch": 2.3, "grad_norm": 0.6494444608688354, "learning_rate": 2.842191155941306e-05, "loss": 0.2897, "step": 920 }, { "epoch": 2.325, "grad_norm": 0.17967885732650757, "learning_rate": 2.8158745711640716e-05, "loss": 0.1642, "step": 930 }, { "epoch": 2.35, "grad_norm": 0.047755829989910126, "learning_rate": 2.7895579863868374e-05, "loss": 0.3492, "step": 940 }, { "epoch": 2.375, "grad_norm": 66.83489990234375, "learning_rate": 2.763241401609603e-05, "loss": 0.0899, "step": 950 }, { "epoch": 2.4, "grad_norm": 21.72690200805664, "learning_rate": 2.7369248168323685e-05, "loss": 0.2154, "step": 960 }, { "epoch": 2.425, "grad_norm": 0.1563444286584854, "learning_rate": 2.7106082320551344e-05, "loss": 0.2106, "step": 970 }, { "epoch": 2.45, "grad_norm": 0.4117478132247925, "learning_rate": 2.6842916472779e-05, "loss": 0.0819, "step": 980 }, { "epoch": 2.475, "grad_norm": 41.47480392456055, "learning_rate": 2.6579750625006658e-05, "loss": 0.4206, "step": 990 }, { "epoch": 2.5, "grad_norm": 0.29815855622291565, "learning_rate": 2.6316584777234313e-05, "loss": 0.1832, "step": 1000 }, { "epoch": 2.525, "grad_norm": 0.07651757448911667, "learning_rate": 2.6053418929461968e-05, "loss": 0.4396, "step": 1010 }, { "epoch": 2.55, "grad_norm": 3.2124338150024414, "learning_rate": 2.5790253081689627e-05, "loss": 0.1765, "step": 1020 }, { "epoch": 2.575, "grad_norm": 32.49565505981445, "learning_rate": 2.5527087233917282e-05, "loss": 0.5668, "step": 1030 }, { "epoch": 2.6, "grad_norm": 3.7792484760284424, "learning_rate": 2.526392138614494e-05, "loss": 0.0786, "step": 1040 }, { "epoch": 2.625, "grad_norm": 24.304460525512695, "learning_rate": 2.5000755538372596e-05, "loss": 0.4862, "step": 1050 }, { "epoch": 2.65, "grad_norm": 0.03224577382206917, "learning_rate": 2.473758969060025e-05, "loss": 0.0639, "step": 1060 }, { "epoch": 2.675, "grad_norm": 0.10748755186796188, "learning_rate": 2.4474423842827913e-05, "loss": 0.3126, "step": 1070 }, { "epoch": 2.7, "grad_norm": 0.07485207915306091, "learning_rate": 2.421125799505557e-05, "loss": 0.4306, "step": 1080 }, { "epoch": 2.725, "grad_norm": 0.3468710780143738, "learning_rate": 2.3948092147283224e-05, "loss": 0.2609, "step": 1090 }, { "epoch": 2.75, "grad_norm": 0.1928665190935135, "learning_rate": 2.3684926299510883e-05, "loss": 0.1698, "step": 1100 }, { "epoch": 2.775, "grad_norm": 0.0683489441871643, "learning_rate": 2.3421760451738538e-05, "loss": 0.6863, "step": 1110 }, { "epoch": 2.8, "grad_norm": 0.12724503874778748, "learning_rate": 2.3158594603966197e-05, "loss": 0.1283, "step": 1120 }, { "epoch": 2.825, "grad_norm": 18.53827476501465, "learning_rate": 2.2895428756193852e-05, "loss": 0.4287, "step": 1130 }, { "epoch": 2.85, "grad_norm": 13.878090858459473, "learning_rate": 2.2632262908421507e-05, "loss": 0.4846, "step": 1140 }, { "epoch": 2.875, "grad_norm": 45.64787673950195, "learning_rate": 2.2369097060649166e-05, "loss": 0.2517, "step": 1150 }, { "epoch": 2.9, "grad_norm": 0.3627373278141022, "learning_rate": 2.210593121287682e-05, "loss": 0.024, "step": 1160 }, { "epoch": 2.925, "grad_norm": 4.56863260269165, "learning_rate": 2.184276536510448e-05, "loss": 0.1032, "step": 1170 }, { "epoch": 2.95, "grad_norm": 0.12671475112438202, "learning_rate": 2.1579599517332135e-05, "loss": 0.3173, "step": 1180 }, { "epoch": 2.975, "grad_norm": 0.2638857662677765, "learning_rate": 2.1316433669559794e-05, "loss": 0.3071, "step": 1190 }, { "epoch": 3.0, "grad_norm": 165.09217834472656, "learning_rate": 2.1053267821787452e-05, "loss": 0.3667, "step": 1200 }, { "epoch": 3.0, "eval_accuracy": 0.855, "eval_f1": 0.7928571428571428, "eval_loss": 0.6172541975975037, "eval_precision": 0.7070063694267515, "eval_recall": 0.9024390243902439, "eval_runtime": 1.5339, "eval_samples_per_second": 260.768, "eval_steps_per_second": 16.298, "step": 1200 }, { "epoch": 3.025, "grad_norm": 0.03663462772965431, "learning_rate": 2.0790101974015108e-05, "loss": 0.2258, "step": 1210 }, { "epoch": 3.05, "grad_norm": 0.15920574963092804, "learning_rate": 2.0526936126242766e-05, "loss": 0.1118, "step": 1220 }, { "epoch": 3.075, "grad_norm": 10.940315246582031, "learning_rate": 2.026377027847042e-05, "loss": 0.1079, "step": 1230 }, { "epoch": 3.1, "grad_norm": 0.24473267793655396, "learning_rate": 2.0000604430698077e-05, "loss": 0.0076, "step": 1240 }, { "epoch": 3.125, "grad_norm": 0.11901724338531494, "learning_rate": 1.9737438582925736e-05, "loss": 0.0048, "step": 1250 }, { "epoch": 3.15, "grad_norm": 0.5029746890068054, "learning_rate": 1.947427273515339e-05, "loss": 0.518, "step": 1260 }, { "epoch": 3.175, "grad_norm": 0.16407223045825958, "learning_rate": 1.921110688738105e-05, "loss": 0.0487, "step": 1270 }, { "epoch": 3.2, "grad_norm": 43.9595947265625, "learning_rate": 1.8947941039608705e-05, "loss": 0.221, "step": 1280 }, { "epoch": 3.225, "grad_norm": 0.043054983019828796, "learning_rate": 1.868477519183636e-05, "loss": 0.2537, "step": 1290 }, { "epoch": 3.25, "grad_norm": 0.23155592381954193, "learning_rate": 1.842160934406402e-05, "loss": 0.4016, "step": 1300 }, { "epoch": 3.275, "grad_norm": 0.11036993563175201, "learning_rate": 1.8158443496291674e-05, "loss": 0.2479, "step": 1310 }, { "epoch": 3.3, "grad_norm": 9.334052085876465, "learning_rate": 1.7895277648519333e-05, "loss": 0.3436, "step": 1320 }, { "epoch": 3.325, "grad_norm": 0.12501460313796997, "learning_rate": 1.763211180074699e-05, "loss": 0.2112, "step": 1330 }, { "epoch": 3.35, "grad_norm": 0.06664387881755829, "learning_rate": 1.7368945952974647e-05, "loss": 0.2406, "step": 1340 }, { "epoch": 3.375, "grad_norm": 0.2532443702220917, "learning_rate": 1.7105780105202305e-05, "loss": 0.268, "step": 1350 }, { "epoch": 3.4, "grad_norm": 0.11059623956680298, "learning_rate": 1.684261425742996e-05, "loss": 0.1377, "step": 1360 }, { "epoch": 3.425, "grad_norm": 0.22316700220108032, "learning_rate": 1.6579448409657616e-05, "loss": 0.2411, "step": 1370 }, { "epoch": 3.45, "grad_norm": 0.4598884582519531, "learning_rate": 1.6316282561885274e-05, "loss": 0.1774, "step": 1380 }, { "epoch": 3.475, "grad_norm": 0.17457233369350433, "learning_rate": 1.605311671411293e-05, "loss": 0.0483, "step": 1390 }, { "epoch": 3.5, "grad_norm": 0.12707385420799255, "learning_rate": 1.578995086634059e-05, "loss": 0.0061, "step": 1400 }, { "epoch": 3.525, "grad_norm": 0.8905083537101746, "learning_rate": 1.5526785018568244e-05, "loss": 0.1226, "step": 1410 }, { "epoch": 3.55, "grad_norm": 0.027454137802124023, "learning_rate": 1.52636191707959e-05, "loss": 0.1416, "step": 1420 }, { "epoch": 3.575, "grad_norm": 9.629097938537598, "learning_rate": 1.5000453323023558e-05, "loss": 0.2324, "step": 1430 }, { "epoch": 3.6, "grad_norm": 0.048838574439287186, "learning_rate": 1.4737287475251216e-05, "loss": 0.1535, "step": 1440 }, { "epoch": 3.625, "grad_norm": 0.04500986263155937, "learning_rate": 1.4474121627478873e-05, "loss": 0.0829, "step": 1450 }, { "epoch": 3.65, "grad_norm": 0.07558059692382812, "learning_rate": 1.421095577970653e-05, "loss": 0.2319, "step": 1460 }, { "epoch": 3.675, "grad_norm": 0.17243552207946777, "learning_rate": 1.3947789931934187e-05, "loss": 0.008, "step": 1470 }, { "epoch": 3.7, "grad_norm": 0.15297254920005798, "learning_rate": 1.3684624084161843e-05, "loss": 0.1315, "step": 1480 }, { "epoch": 3.725, "grad_norm": 0.05509917438030243, "learning_rate": 1.34214582363895e-05, "loss": 0.0062, "step": 1490 }, { "epoch": 3.75, "grad_norm": 0.042248114943504333, "learning_rate": 1.3158292388617156e-05, "loss": 0.0033, "step": 1500 }, { "epoch": 3.775, "grad_norm": 0.0196397565305233, "learning_rate": 1.2895126540844813e-05, "loss": 0.1618, "step": 1510 }, { "epoch": 3.8, "grad_norm": 0.054895512759685516, "learning_rate": 1.263196069307247e-05, "loss": 0.1938, "step": 1520 }, { "epoch": 3.825, "grad_norm": 0.04431680217385292, "learning_rate": 1.2368794845300126e-05, "loss": 0.2307, "step": 1530 }, { "epoch": 3.85, "grad_norm": 0.04325120523571968, "learning_rate": 1.2105628997527784e-05, "loss": 0.0026, "step": 1540 }, { "epoch": 3.875, "grad_norm": 0.9810055494308472, "learning_rate": 1.1842463149755441e-05, "loss": 0.2551, "step": 1550 }, { "epoch": 3.9, "grad_norm": 0.11533799022436142, "learning_rate": 1.1579297301983098e-05, "loss": 0.604, "step": 1560 }, { "epoch": 3.925, "grad_norm": 0.09560931473970413, "learning_rate": 1.1316131454210754e-05, "loss": 0.1318, "step": 1570 }, { "epoch": 3.95, "grad_norm": 0.1265428364276886, "learning_rate": 1.105296560643841e-05, "loss": 0.1454, "step": 1580 }, { "epoch": 3.975, "grad_norm": 0.04381510615348816, "learning_rate": 1.0789799758666068e-05, "loss": 0.1591, "step": 1590 }, { "epoch": 4.0, "grad_norm": 56.61437225341797, "learning_rate": 1.0526633910893726e-05, "loss": 0.3659, "step": 1600 }, { "epoch": 4.0, "eval_accuracy": 0.8775, "eval_f1": 0.7932489451476793, "eval_loss": 0.5321782231330872, "eval_precision": 0.8245614035087719, "eval_recall": 0.7642276422764228, "eval_runtime": 1.5289, "eval_samples_per_second": 261.622, "eval_steps_per_second": 16.351, "step": 1600 } ], "logging_steps": 10, "max_steps": 2000, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 847261481803776.0, "train_batch_size": 4, "trial_name": null, "trial_params": { "_wandb": {}, "assignments": {}, "learning_rate": 5.2633169554468626e-05, "metric": "eval/loss", "num_train_epochs": 5, "per_device_train_batch_size": 4, "seed": 25 } }