Upload 14 files

Browse files

trained with 10 epochs

Files changed (8) hide show

adapter_model.safetensors +1 -1
config.json +1 -1
model.safetensors +2 -2
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +1086 -0
training_args.bin +3 -0

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0959b4b79c70f7f0f6f8ea6096fc88b75ffb66c687e1f532f27516fb6d4e03d1
 size 2536568

 version https://git-lfs.github.com/spec/v1
+oid sha256:83c32cb7d669775f075fb0a87c953cade6c10b0747873eb4ceebef2cb2f2892f
 size 2536568

config.json CHANGED Viewed

@@ -39,6 +39,6 @@
   "sinusoidal_pos_embds": false,
   "tie_weights_": true,
   "torch_dtype": "float32",
-  "transformers_version": "4.45.0",
   "vocab_size": 30522
 }

   "sinusoidal_pos_embds": false,
   "tie_weights_": true,
   "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
   "vocab_size": 30522
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72815fa37d94fe560665dc778ac50a1606e00f3cb03133aafb4b10784be08543
-size 267851024

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8e2d7fc59b1fbb2a3ca19bdc93c9e8b7b3237e5f3609c4b59e9ba257e3e4ec0
+size 214037161

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d437e51fed1f120e9b341f85d3bb2a02f70a8e251fc4cf0da979dc3fe761ca4b
+size 5082798

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:468a0fc8a2aa5f71deed2da1be3649a479efb234844a546ab87a69c682ebf27e
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7948e301d82ada6e7683ed3d39239b5c72d60691247f70984c929351115adbea
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1086 @@

+{
+  "best_metric": 0.2595302164554596,
+  "best_model_checkpoint": "distilbert-base-uncased-lora-intent-classification-v2/checkpoint-67716",
+  "epoch": 9.0,
+  "eval_steps": 500,
+  "global_step": 67716,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.06645401382243488,
+      "grad_norm": 4.6704421043396,
+      "learning_rate": 0.0009933545986177566,
+      "loss": 0.6675,
+      "step": 500
+    },
+    {
+      "epoch": 0.13290802764486975,
+      "grad_norm": 2.3022220134735107,
+      "learning_rate": 0.000986709197235513,
+      "loss": 0.4718,
+      "step": 1000
+    },
+    {
+      "epoch": 0.19936204146730463,
+      "grad_norm": 0.44215622544288635,
+      "learning_rate": 0.0009800637958532696,
+      "loss": 0.4146,
+      "step": 1500
+    },
+    {
+      "epoch": 0.2658160552897395,
+      "grad_norm": 0.08581192046403885,
+      "learning_rate": 0.0009734183944710261,
+      "loss": 0.4297,
+      "step": 2000
+    },
+    {
+      "epoch": 0.3322700691121744,
+      "grad_norm": 13.087315559387207,
+      "learning_rate": 0.0009667729930887826,
+      "loss": 0.3776,
+      "step": 2500
+    },
+    {
+      "epoch": 0.39872408293460926,
+      "grad_norm": 15.066133499145508,
+      "learning_rate": 0.0009601275917065391,
+      "loss": 0.4233,
+      "step": 3000
+    },
+    {
+      "epoch": 0.46517809675704413,
+      "grad_norm": 0.23827387392520905,
+      "learning_rate": 0.0009534821903242956,
+      "loss": 0.3613,
+      "step": 3500
+    },
+    {
+      "epoch": 0.531632110579479,
+      "grad_norm": 0.009319925680756569,
+      "learning_rate": 0.0009468367889420521,
+      "loss": 0.4269,
+      "step": 4000
+    },
+    {
+      "epoch": 0.5980861244019139,
+      "grad_norm": 0.665321946144104,
+      "learning_rate": 0.0009401913875598086,
+      "loss": 0.3815,
+      "step": 4500
+    },
+    {
+      "epoch": 0.6645401382243488,
+      "grad_norm": 3.580693483352661,
+      "learning_rate": 0.0009335459861775651,
+      "loss": 0.3539,
+      "step": 5000
+    },
+    {
+      "epoch": 0.7309941520467836,
+      "grad_norm": 0.12289135903120041,
+      "learning_rate": 0.0009269005847953217,
+      "loss": 0.4112,
+      "step": 5500
+    },
+    {
+      "epoch": 0.7974481658692185,
+      "grad_norm": 1.3471044301986694,
+      "learning_rate": 0.0009202551834130782,
+      "loss": 0.4109,
+      "step": 6000
+    },
+    {
+      "epoch": 0.8639021796916534,
+      "grad_norm": 0.09887880831956863,
+      "learning_rate": 0.0009136097820308346,
+      "loss": 0.4508,
+      "step": 6500
+    },
+    {
+      "epoch": 0.9303561935140883,
+      "grad_norm": 0.005311007611453533,
+      "learning_rate": 0.0009069643806485912,
+      "loss": 0.4011,
+      "step": 7000
+    },
+    {
+      "epoch": 0.9968102073365231,
+      "grad_norm": 1.1049816608428955,
+      "learning_rate": 0.0009003189792663478,
+      "loss": 0.368,
+      "step": 7500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9425867507886435,
+      "eval_f1": 0.9421244141375861,
+      "eval_loss": 0.3986539840698242,
+      "eval_precision": 0.9421379340931425,
+      "eval_recall": 0.9425867507886435,
+      "eval_runtime": 4.728,
+      "eval_samples_per_second": 335.238,
+      "eval_steps_per_second": 83.968,
+      "step": 7524
+    },
+    {
+      "epoch": 1.063264221158958,
+      "grad_norm": 70.09782409667969,
+      "learning_rate": 0.0008936735778841042,
+      "loss": 0.3306,
+      "step": 8000
+    },
+    {
+      "epoch": 1.1297182349813928,
+      "grad_norm": 0.7961419820785522,
+      "learning_rate": 0.0008870281765018608,
+      "loss": 0.3746,
+      "step": 8500
+    },
+    {
+      "epoch": 1.1961722488038278,
+      "grad_norm": 0.060738347470760345,
+      "learning_rate": 0.0008803827751196173,
+      "loss": 0.4045,
+      "step": 9000
+    },
+    {
+      "epoch": 1.2626262626262625,
+      "grad_norm": 0.20715029537677765,
+      "learning_rate": 0.0008737373737373737,
+      "loss": 0.4587,
+      "step": 9500
+    },
+    {
+      "epoch": 1.3290802764486975,
+      "grad_norm": 0.08913299441337585,
+      "learning_rate": 0.0008670919723551303,
+      "loss": 0.4504,
+      "step": 10000
+    },
+    {
+      "epoch": 1.3955342902711323,
+      "grad_norm": 0.14319421350955963,
+      "learning_rate": 0.0008604465709728868,
+      "loss": 0.3991,
+      "step": 10500
+    },
+    {
+      "epoch": 1.4619883040935673,
+      "grad_norm": 2.545884370803833,
+      "learning_rate": 0.0008538011695906432,
+      "loss": 0.4192,
+      "step": 11000
+    },
+    {
+      "epoch": 1.528442317916002,
+      "grad_norm": 0.12403066456317902,
+      "learning_rate": 0.0008471557682083998,
+      "loss": 0.3563,
+      "step": 11500
+    },
+    {
+      "epoch": 1.594896331738437,
+      "grad_norm": 41.519954681396484,
+      "learning_rate": 0.0008405103668261563,
+      "loss": 0.3435,
+      "step": 12000
+    },
+    {
+      "epoch": 1.661350345560872,
+      "grad_norm": 83.61852264404297,
+      "learning_rate": 0.0008338649654439129,
+      "loss": 0.3503,
+      "step": 12500
+    },
+    {
+      "epoch": 1.7278043593833068,
+      "grad_norm": 0.001769404741935432,
+      "learning_rate": 0.0008272195640616694,
+      "loss": 0.3238,
+      "step": 13000
+    },
+    {
+      "epoch": 1.7942583732057416,
+      "grad_norm": 1.7677043676376343,
+      "learning_rate": 0.0008205741626794258,
+      "loss": 0.38,
+      "step": 13500
+    },
+    {
+      "epoch": 1.8607123870281765,
+      "grad_norm": 1.0566127300262451,
+      "learning_rate": 0.0008139287612971824,
+      "loss": 0.4146,
+      "step": 14000
+    },
+    {
+      "epoch": 1.9271664008506115,
+      "grad_norm": 19.463109970092773,
+      "learning_rate": 0.0008072833599149389,
+      "loss": 0.4305,
+      "step": 14500
+    },
+    {
+      "epoch": 1.9936204146730463,
+      "grad_norm": 17.069889068603516,
+      "learning_rate": 0.0008006379585326954,
+      "loss": 0.3505,
+      "step": 15000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9482649842271293,
+      "eval_f1": 0.9478124684113843,
+      "eval_loss": 0.3766539990901947,
+      "eval_precision": 0.9481744874506283,
+      "eval_recall": 0.9482649842271293,
+      "eval_runtime": 4.5607,
+      "eval_samples_per_second": 347.537,
+      "eval_steps_per_second": 87.049,
+      "step": 15048
+    },
+    {
+      "epoch": 2.060074428495481,
+      "grad_norm": 0.4118238389492035,
+      "learning_rate": 0.000793992557150452,
+      "loss": 0.3021,
+      "step": 15500
+    },
+    {
+      "epoch": 2.126528442317916,
+      "grad_norm": 0.4119320213794708,
+      "learning_rate": 0.0007873471557682083,
+      "loss": 0.3166,
+      "step": 16000
+    },
+    {
+      "epoch": 2.192982456140351,
+      "grad_norm": 10.00361442565918,
+      "learning_rate": 0.0007807017543859649,
+      "loss": 0.374,
+      "step": 16500
+    },
+    {
+      "epoch": 2.2594364699627856,
+      "grad_norm": 44.608726501464844,
+      "learning_rate": 0.0007740563530037215,
+      "loss": 0.4748,
+      "step": 17000
+    },
+    {
+      "epoch": 2.3258904837852206,
+      "grad_norm": 0.09617531299591064,
+      "learning_rate": 0.000767410951621478,
+      "loss": 0.3771,
+      "step": 17500
+    },
+    {
+      "epoch": 2.3923444976076556,
+      "grad_norm": 26.71993064880371,
+      "learning_rate": 0.0007607655502392344,
+      "loss": 0.4181,
+      "step": 18000
+    },
+    {
+      "epoch": 2.4587985114300905,
+      "grad_norm": 0.003970532212406397,
+      "learning_rate": 0.000754120148856991,
+      "loss": 0.3365,
+      "step": 18500
+    },
+    {
+      "epoch": 2.525252525252525,
+      "grad_norm": 0.023912647739052773,
+      "learning_rate": 0.0007474747474747475,
+      "loss": 0.3731,
+      "step": 19000
+    },
+    {
+      "epoch": 2.59170653907496,
+      "grad_norm": 0.08333996683359146,
+      "learning_rate": 0.000740829346092504,
+      "loss": 0.4489,
+      "step": 19500
+    },
+    {
+      "epoch": 2.658160552897395,
+      "grad_norm": 0.01645304262638092,
+      "learning_rate": 0.0007341839447102606,
+      "loss": 0.4246,
+      "step": 20000
+    },
+    {
+      "epoch": 2.72461456671983,
+      "grad_norm": 0.08779849112033844,
+      "learning_rate": 0.000727538543328017,
+      "loss": 0.4556,
+      "step": 20500
+    },
+    {
+      "epoch": 2.7910685805422646,
+      "grad_norm": 52.66293716430664,
+      "learning_rate": 0.0007208931419457735,
+      "loss": 0.3538,
+      "step": 21000
+    },
+    {
+      "epoch": 2.8575225943646996,
+      "grad_norm": 0.028336428105831146,
+      "learning_rate": 0.00071424774056353,
+      "loss": 0.3813,
+      "step": 21500
+    },
+    {
+      "epoch": 2.9239766081871346,
+      "grad_norm": 0.30558499693870544,
+      "learning_rate": 0.0007076023391812866,
+      "loss": 0.4138,
+      "step": 22000
+    },
+    {
+      "epoch": 2.990430622009569,
+      "grad_norm": 30.89914321899414,
+      "learning_rate": 0.0007009569377990431,
+      "loss": 0.3391,
+      "step": 22500
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9539432176656152,
+      "eval_f1": 0.95367799565447,
+      "eval_loss": 0.34262794256210327,
+      "eval_precision": 0.9535465559361256,
+      "eval_recall": 0.9539432176656152,
+      "eval_runtime": 4.5296,
+      "eval_samples_per_second": 349.923,
+      "eval_steps_per_second": 87.646,
+      "step": 22572
+    },
+    {
+      "epoch": 3.056884635832004,
+      "grad_norm": 280.99310302734375,
+      "learning_rate": 0.0006943115364167995,
+      "loss": 0.3269,
+      "step": 23000
+    },
+    {
+      "epoch": 3.123338649654439,
+      "grad_norm": 0.030926929786801338,
+      "learning_rate": 0.0006876661350345561,
+      "loss": 0.3015,
+      "step": 23500
+    },
+    {
+      "epoch": 3.189792663476874,
+      "grad_norm": 0.1642533391714096,
+      "learning_rate": 0.0006810207336523127,
+      "loss": 0.3959,
+      "step": 24000
+    },
+    {
+      "epoch": 3.256246677299309,
+      "grad_norm": 4.198115825653076,
+      "learning_rate": 0.000674375332270069,
+      "loss": 0.4014,
+      "step": 24500
+    },
+    {
+      "epoch": 3.3227006911217436,
+      "grad_norm": 0.007642796263098717,
+      "learning_rate": 0.0006677299308878256,
+      "loss": 0.3203,
+      "step": 25000
+    },
+    {
+      "epoch": 3.3891547049441786,
+      "grad_norm": 0.018859192728996277,
+      "learning_rate": 0.0006610845295055822,
+      "loss": 0.3617,
+      "step": 25500
+    },
+    {
+      "epoch": 3.4556087187666136,
+      "grad_norm": 0.1555991768836975,
+      "learning_rate": 0.0006544391281233386,
+      "loss": 0.34,
+      "step": 26000
+    },
+    {
+      "epoch": 3.522062732589048,
+      "grad_norm": 0.03736409544944763,
+      "learning_rate": 0.0006477937267410952,
+      "loss": 0.3342,
+      "step": 26500
+    },
+    {
+      "epoch": 3.588516746411483,
+      "grad_norm": 0.0046156104654073715,
+      "learning_rate": 0.0006411483253588518,
+      "loss": 0.3961,
+      "step": 27000
+    },
+    {
+      "epoch": 3.654970760233918,
+      "grad_norm": 27.846786499023438,
+      "learning_rate": 0.0006345029239766082,
+      "loss": 0.2895,
+      "step": 27500
+    },
+    {
+      "epoch": 3.721424774056353,
+      "grad_norm": 19.202760696411133,
+      "learning_rate": 0.0006278575225943647,
+      "loss": 0.4071,
+      "step": 28000
+    },
+    {
+      "epoch": 3.787878787878788,
+      "grad_norm": 0.007552656345069408,
+      "learning_rate": 0.0006212121212121212,
+      "loss": 0.3859,
+      "step": 28500
+    },
+    {
+      "epoch": 3.8543328017012226,
+      "grad_norm": 0.029448220506310463,
+      "learning_rate": 0.0006145667198298778,
+      "loss": 0.3642,
+      "step": 29000
+    },
+    {
+      "epoch": 3.9207868155236576,
+      "grad_norm": 2.9489197731018066,
+      "learning_rate": 0.0006079213184476342,
+      "loss": 0.3331,
+      "step": 29500
+    },
+    {
+      "epoch": 3.9872408293460926,
+      "grad_norm": 0.13416582345962524,
+      "learning_rate": 0.0006012759170653907,
+      "loss": 0.3399,
+      "step": 30000
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.9533123028391167,
+      "eval_f1": 0.9528581216338866,
+      "eval_loss": 0.36635637283325195,
+      "eval_precision": 0.9528819559731596,
+      "eval_recall": 0.9533123028391167,
+      "eval_runtime": 4.1925,
+      "eval_samples_per_second": 378.06,
+      "eval_steps_per_second": 94.694,
+      "step": 30096
+    },
+    {
+      "epoch": 4.053694843168527,
+      "grad_norm": 28.457218170166016,
+      "learning_rate": 0.0005946305156831473,
+      "loss": 0.3025,
+      "step": 30500
+    },
+    {
+      "epoch": 4.120148856990962,
+      "grad_norm": 6.5367112159729,
+      "learning_rate": 0.0005879851143009038,
+      "loss": 0.314,
+      "step": 31000
+    },
+    {
+      "epoch": 4.186602870813397,
+      "grad_norm": 393.4518737792969,
+      "learning_rate": 0.0005813397129186602,
+      "loss": 0.3436,
+      "step": 31500
+    },
+    {
+      "epoch": 4.253056884635832,
+      "grad_norm": 0.9848179221153259,
+      "learning_rate": 0.0005746943115364168,
+      "loss": 0.2768,
+      "step": 32000
+    },
+    {
+      "epoch": 4.319510898458267,
+      "grad_norm": 2.0531139373779297,
+      "learning_rate": 0.0005680489101541734,
+      "loss": 0.3134,
+      "step": 32500
+    },
+    {
+      "epoch": 4.385964912280702,
+      "grad_norm": 0.055749546736478806,
+      "learning_rate": 0.0005614035087719298,
+      "loss": 0.3532,
+      "step": 33000
+    },
+    {
+      "epoch": 4.452418926103137,
+      "grad_norm": 0.4778645634651184,
+      "learning_rate": 0.0005547581073896864,
+      "loss": 0.3622,
+      "step": 33500
+    },
+    {
+      "epoch": 4.518872939925571,
+      "grad_norm": 0.061856046319007874,
+      "learning_rate": 0.0005481127060074428,
+      "loss": 0.3426,
+      "step": 34000
+    },
+    {
+      "epoch": 4.585326953748006,
+      "grad_norm": 0.026136351749300957,
+      "learning_rate": 0.0005414673046251993,
+      "loss": 0.3795,
+      "step": 34500
+    },
+    {
+      "epoch": 4.651780967570441,
+      "grad_norm": 0.03556622937321663,
+      "learning_rate": 0.0005348219032429559,
+      "loss": 0.3322,
+      "step": 35000
+    },
+    {
+      "epoch": 4.718234981392876,
+      "grad_norm": 0.14081618189811707,
+      "learning_rate": 0.0005281765018607124,
+      "loss": 0.3722,
+      "step": 35500
+    },
+    {
+      "epoch": 4.784688995215311,
+      "grad_norm": 100.0813217163086,
+      "learning_rate": 0.0005215311004784689,
+      "loss": 0.3467,
+      "step": 36000
+    },
+    {
+      "epoch": 4.851143009037746,
+      "grad_norm": 9.537514686584473,
+      "learning_rate": 0.0005148856990962254,
+      "loss": 0.3484,
+      "step": 36500
+    },
+    {
+      "epoch": 4.917597022860181,
+      "grad_norm": 0.048729896545410156,
+      "learning_rate": 0.0005082402977139819,
+      "loss": 0.3439,
+      "step": 37000
+    },
+    {
+      "epoch": 4.984051036682615,
+      "grad_norm": 0.005286164116114378,
+      "learning_rate": 0.0005015948963317385,
+      "loss": 0.3023,
+      "step": 37500
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.9570977917981073,
+      "eval_f1": 0.9568038885748729,
+      "eval_loss": 0.3057607114315033,
+      "eval_precision": 0.9566095910966326,
+      "eval_recall": 0.9570977917981073,
+      "eval_runtime": 4.2904,
+      "eval_samples_per_second": 369.428,
+      "eval_steps_per_second": 92.532,
+      "step": 37620
+    },
+    {
+      "epoch": 5.05050505050505,
+      "grad_norm": 123.33903503417969,
+      "learning_rate": 0.000494949494949495,
+      "loss": 0.3801,
+      "step": 38000
+    },
+    {
+      "epoch": 5.116959064327485,
+      "grad_norm": 0.005817115306854248,
+      "learning_rate": 0.0004883040935672514,
+      "loss": 0.3047,
+      "step": 38500
+    },
+    {
+      "epoch": 5.18341307814992,
+      "grad_norm": 0.16751976311206818,
+      "learning_rate": 0.000481658692185008,
+      "loss": 0.4044,
+      "step": 39000
+    },
+    {
+      "epoch": 5.249867091972355,
+      "grad_norm": 60.48826599121094,
+      "learning_rate": 0.0004750132908027645,
+      "loss": 0.3485,
+      "step": 39500
+    },
+    {
+      "epoch": 5.31632110579479,
+      "grad_norm": 157.16188049316406,
+      "learning_rate": 0.000468367889420521,
+      "loss": 0.3368,
+      "step": 40000
+    },
+    {
+      "epoch": 5.382775119617225,
+      "grad_norm": 45.994049072265625,
+      "learning_rate": 0.00046172248803827756,
+      "loss": 0.3816,
+      "step": 40500
+    },
+    {
+      "epoch": 5.44922913343966,
+      "grad_norm": 15.62516975402832,
+      "learning_rate": 0.00045507708665603404,
+      "loss": 0.324,
+      "step": 41000
+    },
+    {
+      "epoch": 5.515683147262095,
+      "grad_norm": 289.2982177734375,
+      "learning_rate": 0.0004484316852737905,
+      "loss": 0.3031,
+      "step": 41500
+    },
+    {
+      "epoch": 5.582137161084529,
+      "grad_norm": 0.027738776057958603,
+      "learning_rate": 0.00044178628389154705,
+      "loss": 0.3392,
+      "step": 42000
+    },
+    {
+      "epoch": 5.648591174906964,
+      "grad_norm": 0.02977157197892666,
+      "learning_rate": 0.0004351408825093036,
+      "loss": 0.3477,
+      "step": 42500
+    },
+    {
+      "epoch": 5.715045188729399,
+      "grad_norm": 1.663713812828064,
+      "learning_rate": 0.0004284954811270601,
+      "loss": 0.3993,
+      "step": 43000
+    },
+    {
+      "epoch": 5.781499202551834,
+      "grad_norm": 2.4411869049072266,
+      "learning_rate": 0.0004218500797448166,
+      "loss": 0.422,
+      "step": 43500
+    },
+    {
+      "epoch": 5.847953216374269,
+      "grad_norm": 12.378539085388184,
+      "learning_rate": 0.0004152046783625731,
+      "loss": 0.3649,
+      "step": 44000
+    },
+    {
+      "epoch": 5.914407230196704,
+      "grad_norm": 82.05158996582031,
+      "learning_rate": 0.00040855927698032964,
+      "loss": 0.4191,
+      "step": 44500
+    },
+    {
+      "epoch": 5.980861244019139,
+      "grad_norm": 0.008256383240222931,
+      "learning_rate": 0.0004019138755980861,
+      "loss": 0.3437,
+      "step": 45000
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.9501577287066246,
+      "eval_f1": 0.9497240205967022,
+      "eval_loss": 0.31248244643211365,
+      "eval_precision": 0.949826651119135,
+      "eval_recall": 0.9501577287066246,
+      "eval_runtime": 4.1272,
+      "eval_samples_per_second": 384.034,
+      "eval_steps_per_second": 96.19,
+      "step": 45144
+    },
+    {
+      "epoch": 6.047315257841573,
+      "grad_norm": 0.22720667719841003,
+      "learning_rate": 0.00039526847421584264,
+      "loss": 0.3774,
+      "step": 45500
+    },
+    {
+      "epoch": 6.113769271664008,
+      "grad_norm": 0.1796969771385193,
+      "learning_rate": 0.0003886230728335992,
+      "loss": 0.3625,
+      "step": 46000
+    },
+    {
+      "epoch": 6.180223285486443,
+      "grad_norm": 0.06664836406707764,
+      "learning_rate": 0.00038197767145135565,
+      "loss": 0.3096,
+      "step": 46500
+    },
+    {
+      "epoch": 6.246677299308878,
+      "grad_norm": 52.87346267700195,
+      "learning_rate": 0.0003753322700691122,
+      "loss": 0.324,
+      "step": 47000
+    },
+    {
+      "epoch": 6.313131313131313,
+      "grad_norm": 0.13641533255577087,
+      "learning_rate": 0.0003686868686868687,
+      "loss": 0.3824,
+      "step": 47500
+    },
+    {
+      "epoch": 6.379585326953748,
+      "grad_norm": 0.014752733521163464,
+      "learning_rate": 0.00036204146730462524,
+      "loss": 0.3576,
+      "step": 48000
+    },
+    {
+      "epoch": 6.446039340776183,
+      "grad_norm": 0.07991009950637817,
+      "learning_rate": 0.0003553960659223817,
+      "loss": 0.2889,
+      "step": 48500
+    },
+    {
+      "epoch": 6.512493354598618,
+      "grad_norm": 0.0857154056429863,
+      "learning_rate": 0.0003487506645401382,
+      "loss": 0.3496,
+      "step": 49000
+    },
+    {
+      "epoch": 6.578947368421053,
+      "grad_norm": 22.04611587524414,
+      "learning_rate": 0.00034210526315789477,
+      "loss": 0.3456,
+      "step": 49500
+    },
+    {
+      "epoch": 6.645401382243487,
+      "grad_norm": 0.3360465466976166,
+      "learning_rate": 0.00033545986177565125,
+      "loss": 0.3113,
+      "step": 50000
+    },
+    {
+      "epoch": 6.711855396065922,
+      "grad_norm": 0.011091183871030807,
+      "learning_rate": 0.0003288144603934078,
+      "loss": 0.3085,
+      "step": 50500
+    },
+    {
+      "epoch": 6.778309409888357,
+      "grad_norm": 45.16307830810547,
+      "learning_rate": 0.00032216905901116425,
+      "loss": 0.261,
+      "step": 51000
+    },
+    {
+      "epoch": 6.844763423710792,
+      "grad_norm": 0.10898467898368835,
+      "learning_rate": 0.0003155236576289208,
+      "loss": 0.2772,
+      "step": 51500
+    },
+    {
+      "epoch": 6.911217437533227,
+      "grad_norm": 0.04280232638120651,
+      "learning_rate": 0.0003088782562466773,
+      "loss": 0.3664,
+      "step": 52000
+    },
+    {
+      "epoch": 6.977671451355662,
+      "grad_norm": 0.44427451491355896,
+      "learning_rate": 0.0003022328548644338,
+      "loss": 0.2981,
+      "step": 52500
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.9570977917981073,
+      "eval_f1": 0.9567609606627793,
+      "eval_loss": 0.3381944000720978,
+      "eval_precision": 0.9567551880330806,
+      "eval_recall": 0.9570977917981073,
+      "eval_runtime": 4.1238,
+      "eval_samples_per_second": 384.357,
+      "eval_steps_per_second": 96.271,
+      "step": 52668
+    },
+    {
+      "epoch": 7.044125465178097,
+      "grad_norm": 12.310619354248047,
+      "learning_rate": 0.00029558745348219037,
+      "loss": 0.2961,
+      "step": 53000
+    },
+    {
+      "epoch": 7.110579479000531,
+      "grad_norm": 0.021439863368868828,
+      "learning_rate": 0.00028894205209994685,
+      "loss": 0.3132,
+      "step": 53500
+    },
+    {
+      "epoch": 7.177033492822966,
+      "grad_norm": 12.506621360778809,
+      "learning_rate": 0.0002822966507177033,
+      "loss": 0.3065,
+      "step": 54000
+    },
+    {
+      "epoch": 7.243487506645401,
+      "grad_norm": 40.974212646484375,
+      "learning_rate": 0.00027565124933545985,
+      "loss": 0.3052,
+      "step": 54500
+    },
+    {
+      "epoch": 7.309941520467836,
+      "grad_norm": 17.352012634277344,
+      "learning_rate": 0.0002690058479532164,
+      "loss": 0.3074,
+      "step": 55000
+    },
+    {
+      "epoch": 7.376395534290271,
+      "grad_norm": 7.186513423919678,
+      "learning_rate": 0.0002623604465709729,
+      "loss": 0.2944,
+      "step": 55500
+    },
+    {
+      "epoch": 7.442849548112706,
+      "grad_norm": 0.11422441154718399,
+      "learning_rate": 0.0002557150451887294,
+      "loss": 0.3277,
+      "step": 56000
+    },
+    {
+      "epoch": 7.509303561935141,
+      "grad_norm": 0.4097649157047272,
+      "learning_rate": 0.0002490696438064859,
+      "loss": 0.3314,
+      "step": 56500
+    },
+    {
+      "epoch": 7.575757575757576,
+      "grad_norm": 255.17686462402344,
+      "learning_rate": 0.00024242424242424245,
+      "loss": 0.3849,
+      "step": 57000
+    },
+    {
+      "epoch": 7.642211589580011,
+      "grad_norm": 0.11329037696123123,
+      "learning_rate": 0.00023577884104199895,
+      "loss": 0.3603,
+      "step": 57500
+    },
+    {
+      "epoch": 7.708665603402445,
+      "grad_norm": 0.04299360513687134,
+      "learning_rate": 0.00022913343965975545,
+      "loss": 0.3467,
+      "step": 58000
+    },
+    {
+      "epoch": 7.77511961722488,
+      "grad_norm": 0.04895203933119774,
+      "learning_rate": 0.00022248803827751195,
+      "loss": 0.3428,
+      "step": 58500
+    },
+    {
+      "epoch": 7.841573631047315,
+      "grad_norm": 0.07165663689374924,
+      "learning_rate": 0.00021584263689526848,
+      "loss": 0.2874,
+      "step": 59000
+    },
+    {
+      "epoch": 7.90802764486975,
+      "grad_norm": 0.10646966099739075,
+      "learning_rate": 0.00020919723551302499,
+      "loss": 0.2834,
+      "step": 59500
+    },
+    {
+      "epoch": 7.974481658692185,
+      "grad_norm": 0.022936491295695305,
+      "learning_rate": 0.00020255183413078152,
+      "loss": 0.2899,
+      "step": 60000
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.9577287066246056,
+      "eval_f1": 0.9575092656624108,
+      "eval_loss": 0.30500882863998413,
+      "eval_precision": 0.9575766504306299,
+      "eval_recall": 0.9577287066246056,
+      "eval_runtime": 4.5012,
+      "eval_samples_per_second": 352.132,
+      "eval_steps_per_second": 88.2,
+      "step": 60192
+    },
+    {
+      "epoch": 8.04093567251462,
+      "grad_norm": 0.4371676743030548,
+      "learning_rate": 0.00019590643274853802,
+      "loss": 0.3231,
+      "step": 60500
+    },
+    {
+      "epoch": 8.107389686337054,
+      "grad_norm": 0.000947824795730412,
+      "learning_rate": 0.00018926103136629452,
+      "loss": 0.3014,
+      "step": 61000
+    },
+    {
+      "epoch": 8.17384370015949,
+      "grad_norm": 0.06363413482904434,
+      "learning_rate": 0.00018261562998405105,
+      "loss": 0.2293,
+      "step": 61500
+    },
+    {
+      "epoch": 8.240297713981924,
+      "grad_norm": 1.2114511728286743,
+      "learning_rate": 0.00017597022860180755,
+      "loss": 0.2808,
+      "step": 62000
+    },
+    {
+      "epoch": 8.30675172780436,
+      "grad_norm": 23.535938262939453,
+      "learning_rate": 0.00016932482721956408,
+      "loss": 0.2595,
+      "step": 62500
+    },
+    {
+      "epoch": 8.373205741626794,
+      "grad_norm": 60.49204635620117,
+      "learning_rate": 0.00016267942583732056,
+      "loss": 0.3388,
+      "step": 63000
+    },
+    {
+      "epoch": 8.43965975544923,
+      "grad_norm": 14.233682632446289,
+      "learning_rate": 0.0001560340244550771,
+      "loss": 0.3423,
+      "step": 63500
+    },
+    {
+      "epoch": 8.506113769271664,
+      "grad_norm": 0.015386885032057762,
+      "learning_rate": 0.0001493886230728336,
+      "loss": 0.316,
+      "step": 64000
+    },
+    {
+      "epoch": 8.5725677830941,
+      "grad_norm": 0.3906301259994507,
+      "learning_rate": 0.00014274322169059012,
+      "loss": 0.3165,
+      "step": 64500
+    },
+    {
+      "epoch": 8.639021796916534,
+      "grad_norm": 0.0586216077208519,
+      "learning_rate": 0.00013609782030834665,
+      "loss": 0.3013,
+      "step": 65000
+    },
+    {
+      "epoch": 8.70547581073897,
+      "grad_norm": 0.006104405503720045,
+      "learning_rate": 0.00012945241892610312,
+      "loss": 0.2352,
+      "step": 65500
+    },
+    {
+      "epoch": 8.771929824561404,
+      "grad_norm": 0.02979845367372036,
+      "learning_rate": 0.00012280701754385965,
+      "loss": 0.2203,
+      "step": 66000
+    },
+    {
+      "epoch": 8.83838383838384,
+      "grad_norm": 0.08639369904994965,
+      "learning_rate": 0.00011616161616161616,
+      "loss": 0.2643,
+      "step": 66500
+    },
+    {
+      "epoch": 8.904837852206274,
+      "grad_norm": 32.0872802734375,
+      "learning_rate": 0.00010951621477937269,
+      "loss": 0.2658,
+      "step": 67000
+    },
+    {
+      "epoch": 8.971291866028707,
+      "grad_norm": 0.011845378205180168,
+      "learning_rate": 0.00010287081339712919,
+      "loss": 0.2795,
+      "step": 67500
+    },
+    {
+      "epoch": 9.0,
+      "eval_accuracy": 0.9646687697160883,
+      "eval_f1": 0.9644253672098426,
+      "eval_loss": 0.2595302164554596,
+      "eval_precision": 0.9644475825303181,
+      "eval_recall": 0.9646687697160883,
+      "eval_runtime": 4.3195,
+      "eval_samples_per_second": 366.941,
+      "eval_steps_per_second": 91.909,
+      "step": 67716
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 75240,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2551274670587520.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6102d63306e03ce9ccefa5c06382dfc3655d1c1d06d494553fcb81a751b526ef
+size 5240