End of training

Browse files

Files changed (5) hide show

README.md +3 -3
all_results.json +8 -0
runs/May25_17-40-02_95411d75b2af/events.out.tfevents.1716675646.95411d75b2af.25.1 +3 -0
train_results.json +8 -0
trainer_state.json +883 -0

README.md CHANGED Viewed

@@ -22,7 +22,7 @@ model-index:
     metrics:
     - name: Accuracy
       type: accuracy
-      value: 0.9280318091451292
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [facebook/convnextv2-base-22k-384](https://huggingface.co/facebook/convnextv2-base-22k-384) on the imagefolder dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3485
-- Accuracy: 0.9280
 ## Model description

     metrics:
     - name: Accuracy
       type: accuracy
+      value: 0.9311507936507937
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 This model is a fine-tuned version of [facebook/convnextv2-base-22k-384](https://huggingface.co/facebook/convnextv2-base-22k-384) on the imagefolder dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3332
+- Accuracy: 0.9312
 ## Model description

all_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 10.0,
+    "total_flos": 4.09349935387607e+19,
+    "train_loss": 0.45005689912540897,
+    "train_runtime": 16363.7477,
+    "train_samples_per_second": 10.744,
+    "train_steps_per_second": 0.672
+}

runs/May25_17-40-02_95411d75b2af/events.out.tfevents.1716675646.95411d75b2af.25.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95c7e207ed08cc1ad6c8db10e9ff63a7108b01447e848f8f17d78ffe89be4c96
+size 411

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 10.0,
+    "total_flos": 4.09349935387607e+19,
+    "train_loss": 0.45005689912540897,
+    "train_runtime": 16363.7477,
+    "train_samples_per_second": 10.744,
+    "train_steps_per_second": 0.672
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,883 @@

+{
+  "best_metric": 0.34852299094200134,
+  "best_model_checkpoint": "./convnext-base-3e-4/checkpoint-10990",
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 10990,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.09,
+      "grad_norm": 17.357587814331055,
+      "learning_rate": 0.00029993871755982685,
+      "loss": 2.1854,
+      "step": 100
+    },
+    {
+      "epoch": 0.18,
+      "grad_norm": 28.103832244873047,
+      "learning_rate": 0.0002997549203131404,
+      "loss": 1.4323,
+      "step": 200
+    },
+    {
+      "epoch": 0.27,
+      "grad_norm": 15.243412017822266,
+      "learning_rate": 0.0002994487584405243,
+      "loss": 1.3262,
+      "step": 300
+    },
+    {
+      "epoch": 0.36,
+      "grad_norm": 13.571104049682617,
+      "learning_rate": 0.00029902048210660057,
+      "loss": 1.164,
+      "step": 400
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 11.755253791809082,
+      "learning_rate": 0.00029847044125561983,
+      "loss": 1.1175,
+      "step": 500
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 8.938959121704102,
+      "learning_rate": 0.00029779908532552276,
+      "loss": 1.0117,
+      "step": 600
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 8.597779273986816,
+      "learning_rate": 0.00029700696288070426,
+      "loss": 1.0719,
+      "step": 700
+    },
+    {
+      "epoch": 0.73,
+      "grad_norm": 12.803329467773438,
+      "learning_rate": 0.0002960947211637822,
+      "loss": 1.0533,
+      "step": 800
+    },
+    {
+      "epoch": 0.82,
+      "grad_norm": 12.126448631286621,
+      "learning_rate": 0.00029506310556673567,
+      "loss": 0.9138,
+      "step": 900
+    },
+    {
+      "epoch": 0.91,
+      "grad_norm": 8.313648223876953,
+      "learning_rate": 0.0002939129590218462,
+      "loss": 0.8947,
+      "step": 1000
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.7884691848906561,
+      "eval_loss": 0.7363528609275818,
+      "eval_runtime": 103.2916,
+      "eval_samples_per_second": 24.349,
+      "eval_steps_per_second": 1.53,
+      "step": 1099
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 8.587052345275879,
+      "learning_rate": 0.00029264522131293815,
+      "loss": 0.9902,
+      "step": 1100
+    },
+    {
+      "epoch": 1.09,
+      "grad_norm": 8.60452938079834,
+      "learning_rate": 0.00029126092830748215,
+      "loss": 0.8517,
+      "step": 1200
+    },
+    {
+      "epoch": 1.18,
+      "grad_norm": 14.598617553710938,
+      "learning_rate": 0.00028976121111018877,
+      "loss": 0.802,
+      "step": 1300
+    },
+    {
+      "epoch": 1.27,
+      "grad_norm": 7.155284881591797,
+      "learning_rate": 0.00028814729513878363,
+      "loss": 0.7962,
+      "step": 1400
+    },
+    {
+      "epoch": 1.36,
+      "grad_norm": 13.24478816986084,
+      "learning_rate": 0.00028642049912271946,
+      "loss": 0.7782,
+      "step": 1500
+    },
+    {
+      "epoch": 1.46,
+      "grad_norm": 10.02603530883789,
+      "learning_rate": 0.0002845822340256436,
+      "loss": 0.7813,
+      "step": 1600
+    },
+    {
+      "epoch": 1.55,
+      "grad_norm": 10.052382469177246,
+      "learning_rate": 0.00028263400189250057,
+      "loss": 0.8079,
+      "step": 1700
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 8.431123733520508,
+      "learning_rate": 0.0002805773946222121,
+      "loss": 0.8041,
+      "step": 1800
+    },
+    {
+      "epoch": 1.73,
+      "grad_norm": 7.3062944412231445,
+      "learning_rate": 0.00027841409266693835,
+      "loss": 0.8019,
+      "step": 1900
+    },
+    {
+      "epoch": 1.82,
+      "grad_norm": 7.146261215209961,
+      "learning_rate": 0.0002761458636589813,
+      "loss": 0.679,
+      "step": 2000
+    },
+    {
+      "epoch": 1.91,
+      "grad_norm": 12.160822868347168,
+      "learning_rate": 0.0002737745609664539,
+      "loss": 0.7643,
+      "step": 2100
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8170974155069582,
+      "eval_loss": 0.628582775592804,
+      "eval_runtime": 103.2407,
+      "eval_samples_per_second": 24.361,
+      "eval_steps_per_second": 1.53,
+      "step": 2198
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 14.596354484558105,
+      "learning_rate": 0.00027130212217889483,
+      "loss": 0.7511,
+      "step": 2200
+    },
+    {
+      "epoch": 2.09,
+      "grad_norm": 11.497203826904297,
+      "learning_rate": 0.000268730567524065,
+      "loss": 0.6527,
+      "step": 2300
+    },
+    {
+      "epoch": 2.18,
+      "grad_norm": 6.976894855499268,
+      "learning_rate": 0.00026606199821722166,
+      "loss": 0.6289,
+      "step": 2400
+    },
+    {
+      "epoch": 2.27,
+      "grad_norm": 9.718855857849121,
+      "learning_rate": 0.0002632985947442167,
+      "loss": 0.6755,
+      "step": 2500
+    },
+    {
+      "epoch": 2.37,
+      "grad_norm": 6.231022357940674,
+      "learning_rate": 0.00026044261507982355,
+      "loss": 0.6377,
+      "step": 2600
+    },
+    {
+      "epoch": 2.46,
+      "grad_norm": 15.673833847045898,
+      "learning_rate": 0.0002574963928427478,
+      "loss": 0.626,
+      "step": 2700
+    },
+    {
+      "epoch": 2.55,
+      "grad_norm": 5.745795726776123,
+      "learning_rate": 0.00025446233538882923,
+      "loss": 0.6276,
+      "step": 2800
+    },
+    {
+      "epoch": 2.64,
+      "grad_norm": 11.555608749389648,
+      "learning_rate": 0.00025134292184399317,
+      "loss": 0.695,
+      "step": 2900
+    },
+    {
+      "epoch": 2.73,
+      "grad_norm": 7.4662089347839355,
+      "learning_rate": 0.00024814070107855875,
+      "loss": 0.6095,
+      "step": 3000
+    },
+    {
+      "epoch": 2.82,
+      "grad_norm": 7.660247802734375,
+      "learning_rate": 0.00024485828962455907,
+      "loss": 0.631,
+      "step": 3100
+    },
+    {
+      "epoch": 2.91,
+      "grad_norm": 9.731380462646484,
+      "learning_rate": 0.00024149836953777485,
+      "loss": 0.6036,
+      "step": 3200
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.8481113320079523,
+      "eval_loss": 0.5258452892303467,
+      "eval_runtime": 104.1346,
+      "eval_samples_per_second": 24.151,
+      "eval_steps_per_second": 1.517,
+      "step": 3297
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 5.469219207763672,
+      "learning_rate": 0.00023806368620622872,
+      "loss": 0.5889,
+      "step": 3300
+    },
+    {
+      "epoch": 3.09,
+      "grad_norm": 8.50100040435791,
+      "learning_rate": 0.0002345570461069312,
+      "loss": 0.5034,
+      "step": 3400
+    },
+    {
+      "epoch": 3.18,
+      "grad_norm": 12.361306190490723,
+      "learning_rate": 0.00023098131451271015,
+      "loss": 0.5181,
+      "step": 3500
+    },
+    {
+      "epoch": 3.28,
+      "grad_norm": 7.584137439727783,
+      "learning_rate": 0.0002273394131509988,
+      "loss": 0.5336,
+      "step": 3600
+    },
+    {
+      "epoch": 3.37,
+      "grad_norm": 10.401506423950195,
+      "learning_rate": 0.0002236343178164948,
+      "loss": 0.5216,
+      "step": 3700
+    },
+    {
+      "epoch": 3.46,
+      "grad_norm": 5.352778434753418,
+      "learning_rate": 0.00021986905593964046,
+      "loss": 0.4939,
+      "step": 3800
+    },
+    {
+      "epoch": 3.55,
+      "grad_norm": 7.9993109703063965,
+      "learning_rate": 0.0002160467041129117,
+      "loss": 0.521,
+      "step": 3900
+    },
+    {
+      "epoch": 3.64,
+      "grad_norm": 9.176218032836914,
+      "learning_rate": 0.00021217038557693726,
+      "loss": 0.5288,
+      "step": 4000
+    },
+    {
+      "epoch": 3.73,
+      "grad_norm": 9.322657585144043,
+      "learning_rate": 0.0002082432676685007,
+      "loss": 0.5168,
+      "step": 4100
+    },
+    {
+      "epoch": 3.82,
+      "grad_norm": 5.861387252807617,
+      "learning_rate": 0.00020426855923251228,
+      "loss": 0.5081,
+      "step": 4200
+    },
+    {
+      "epoch": 3.91,
+      "grad_norm": 4.290045261383057,
+      "learning_rate": 0.00020024950800006462,
+      "loss": 0.5012,
+      "step": 4300
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.869582504970179,
+      "eval_loss": 0.49109867215156555,
+      "eval_runtime": 102.6443,
+      "eval_samples_per_second": 24.502,
+      "eval_steps_per_second": 1.539,
+      "step": 4396
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 4.063934803009033,
+      "learning_rate": 0.0001961893979347137,
+      "loss": 0.4754,
+      "step": 4400
+    },
+    {
+      "epoch": 4.09,
+      "grad_norm": 5.163869380950928,
+      "learning_rate": 0.00019209154654915522,
+      "loss": 0.4471,
+      "step": 4500
+    },
+    {
+      "epoch": 4.19,
+      "grad_norm": 3.733952522277832,
+      "learning_rate": 0.0001879593021944875,
+      "loss": 0.4004,
+      "step": 4600
+    },
+    {
+      "epoch": 4.28,
+      "grad_norm": 7.615368366241455,
+      "learning_rate": 0.00018379604132427648,
+      "loss": 0.4076,
+      "step": 4700
+    },
+    {
+      "epoch": 4.37,
+      "grad_norm": 9.600367546081543,
+      "learning_rate": 0.0001796051657356582,
+      "loss": 0.4035,
+      "step": 4800
+    },
+    {
+      "epoch": 4.46,
+      "grad_norm": 9.41919231414795,
+      "learning_rate": 0.0001753900997897331,
+      "loss": 0.4281,
+      "step": 4900
+    },
+    {
+      "epoch": 4.55,
+      "grad_norm": 13.647310256958008,
+      "learning_rate": 0.00017115428761352327,
+      "loss": 0.3674,
+      "step": 5000
+    },
+    {
+      "epoch": 4.64,
+      "grad_norm": 2.1058132648468018,
+      "learning_rate": 0.00016690119028577906,
+      "loss": 0.3917,
+      "step": 5100
+    },
+    {
+      "epoch": 4.73,
+      "grad_norm": 4.259520053863525,
+      "learning_rate": 0.0001626342830089342,
+      "loss": 0.3899,
+      "step": 5200
+    },
+    {
+      "epoch": 4.82,
+      "grad_norm": 4.13034200668335,
+      "learning_rate": 0.0001583570522695211,
+      "loss": 0.4178,
+      "step": 5300
+    },
+    {
+      "epoch": 4.91,
+      "grad_norm": 13.085577011108398,
+      "learning_rate": 0.00015407299298936486,
+      "loss": 0.3926,
+      "step": 5400
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.8930417495029821,
+      "eval_loss": 0.38039031624794006,
+      "eval_runtime": 103.7463,
+      "eval_samples_per_second": 24.242,
+      "eval_steps_per_second": 1.523,
+      "step": 5495
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 3.522939443588257,
+      "learning_rate": 0.000149785605669886,
+      "loss": 0.3671,
+      "step": 5500
+    },
+    {
+      "epoch": 5.1,
+      "grad_norm": 5.17425537109375,
+      "learning_rate": 0.00014549839353184327,
+      "loss": 0.3017,
+      "step": 5600
+    },
+    {
+      "epoch": 5.19,
+      "grad_norm": 6.327219009399414,
+      "learning_rate": 0.00014121485965285484,
+      "loss": 0.2922,
+      "step": 5700
+    },
+    {
+      "epoch": 5.28,
+      "grad_norm": 0.7092263102531433,
+      "learning_rate": 0.00013693850410503614,
+      "loss": 0.314,
+      "step": 5800
+    },
+    {
+      "epoch": 5.37,
+      "grad_norm": 0.38373059034347534,
+      "learning_rate": 0.0001326728210950942,
+      "loss": 0.3141,
+      "step": 5900
+    },
+    {
+      "epoch": 5.46,
+      "grad_norm": 5.553852558135986,
+      "learning_rate": 0.00012842129610921376,
+      "loss": 0.2821,
+      "step": 6000
+    },
+    {
+      "epoch": 5.55,
+      "grad_norm": 3.678790330886841,
+      "learning_rate": 0.00012418740306506922,
+      "loss": 0.3359,
+      "step": 6100
+    },
+    {
+      "epoch": 5.64,
+      "grad_norm": 4.428023338317871,
+      "learning_rate": 0.00011997460147328983,
+      "loss": 0.2825,
+      "step": 6200
+    },
+    {
+      "epoch": 5.73,
+      "grad_norm": 3.3043198585510254,
+      "learning_rate": 0.00011578633361069557,
+      "loss": 0.3317,
+      "step": 6300
+    },
+    {
+      "epoch": 5.82,
+      "grad_norm": 1.317456603050232,
+      "learning_rate": 0.0001116260217076161,
+      "loss": 0.2983,
+      "step": 6400
+    },
+    {
+      "epoch": 5.91,
+      "grad_norm": 8.99087142944336,
+      "learning_rate": 0.00010749706515158862,
+      "loss": 0.3348,
+      "step": 6500
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.8970178926441352,
+      "eval_loss": 0.41324833035469055,
+      "eval_runtime": 103.3001,
+      "eval_samples_per_second": 24.347,
+      "eval_steps_per_second": 1.53,
+      "step": 6594
+    },
+    {
+      "epoch": 6.01,
+      "grad_norm": 4.097568988800049,
+      "learning_rate": 0.00010340283770972167,
+      "loss": 0.3045,
+      "step": 6600
+    },
+    {
+      "epoch": 6.1,
+      "grad_norm": 10.98578929901123,
+      "learning_rate": 9.93466847719919e-05,
+      "loss": 0.2327,
+      "step": 6700
+    },
+    {
+      "epoch": 6.19,
+      "grad_norm": 13.919866561889648,
+      "learning_rate": 9.533192061772917e-05,
+      "loss": 0.2696,
+      "step": 6800
+    },
+    {
+      "epoch": 6.28,
+      "grad_norm": 6.072042942047119,
+      "learning_rate": 9.136182570752152e-05,
+      "loss": 0.2258,
+      "step": 6900
+    },
+    {
+      "epoch": 6.37,
+      "grad_norm": 0.17451171576976776,
+      "learning_rate": 8.743964400275302e-05,
+      "loss": 0.2406,
+      "step": 7000
+    },
+    {
+      "epoch": 6.46,
+      "grad_norm": 0.33122172951698303,
+      "learning_rate": 8.356858031496595e-05,
+      "loss": 0.2505,
+      "step": 7100
+    },
+    {
+      "epoch": 6.55,
+      "grad_norm": 1.151172161102295,
+      "learning_rate": 7.975179768721186e-05,
+      "loss": 0.1903,
+      "step": 7200
+    },
+    {
+      "epoch": 6.64,
+      "grad_norm": 0.7031873464584351,
+      "learning_rate": 7.59924148095311e-05,
+      "loss": 0.2085,
+      "step": 7300
+    },
+    {
+      "epoch": 6.73,
+      "grad_norm": 6.131903171539307,
+      "learning_rate": 7.229350347067424e-05,
+      "loss": 0.2471,
+      "step": 7400
+    },
+    {
+      "epoch": 6.82,
+      "grad_norm": 6.110349178314209,
+      "learning_rate": 6.865808604814564e-05,
+      "loss": 0.2085,
+      "step": 7500
+    },
+    {
+      "epoch": 6.92,
+      "grad_norm": 4.413149833679199,
+      "learning_rate": 6.508913303862143e-05,
+      "loss": 0.2594,
+      "step": 7600
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.9153081510934393,
+      "eval_loss": 0.3626956641674042,
+      "eval_runtime": 103.1814,
+      "eval_samples_per_second": 24.375,
+      "eval_steps_per_second": 1.531,
+      "step": 7693
+    },
+    {
+      "epoch": 7.01,
+      "grad_norm": 3.3153018951416016,
+      "learning_rate": 6.158956063075865e-05,
+      "loss": 0.1743,
+      "step": 7700
+    },
+    {
+      "epoch": 7.1,
+      "grad_norm": 2.5595600605010986,
+      "learning_rate": 5.816222832238015e-05,
+      "loss": 0.1699,
+      "step": 7800
+    },
+    {
+      "epoch": 7.19,
+      "grad_norm": 3.9605636596679688,
+      "learning_rate": 5.4809936583981286e-05,
+      "loss": 0.2036,
+      "step": 7900
+    },
+    {
+      "epoch": 7.28,
+      "grad_norm": 0.7597993612289429,
+      "learning_rate": 5.1535424570467366e-05,
+      "loss": 0.1829,
+      "step": 8000
+    },
+    {
+      "epoch": 7.37,
+      "grad_norm": 5.694293022155762,
+      "learning_rate": 4.834136788299248e-05,
+      "loss": 0.2039,
+      "step": 8100
+    },
+    {
+      "epoch": 7.46,
+      "grad_norm": 0.5163713097572327,
+      "learning_rate": 4.523037638272821e-05,
+      "loss": 0.1764,
+      "step": 8200
+    },
+    {
+      "epoch": 7.55,
+      "grad_norm": 4.396867275238037,
+      "learning_rate": 4.220499205834782e-05,
+      "loss": 0.1862,
+      "step": 8300
+    },
+    {
+      "epoch": 7.64,
+      "grad_norm": 0.054451316595077515,
+      "learning_rate": 3.926768694896931e-05,
+      "loss": 0.1773,
+      "step": 8400
+    },
+    {
+      "epoch": 7.73,
+      "grad_norm": 0.23744052648544312,
+      "learning_rate": 3.64208611242546e-05,
+      "loss": 0.1648,
+      "step": 8500
+    },
+    {
+      "epoch": 7.83,
+      "grad_norm": 3.540268659591675,
+      "learning_rate": 3.366684072331414e-05,
+      "loss": 0.1541,
+      "step": 8600
+    },
+    {
+      "epoch": 7.92,
+      "grad_norm": 0.33744722604751587,
+      "learning_rate": 3.100787605402072e-05,
+      "loss": 0.1751,
+      "step": 8700
+    },
+    {
+      "epoch": 8.0,
+      "eval_accuracy": 0.9308151093439364,
+      "eval_loss": 0.3506681025028229,
+      "eval_runtime": 103.8384,
+      "eval_samples_per_second": 24.22,
+      "eval_steps_per_second": 1.522,
+      "step": 8792
+    },
+    {
+      "epoch": 8.01,
+      "grad_norm": 0.13831892609596252,
+      "learning_rate": 2.844613975428448e-05,
+      "loss": 0.1472,
+      "step": 8800
+    },
+    {
+      "epoch": 8.1,
+      "grad_norm": 2.3098576068878174,
+      "learning_rate": 2.5983725016792572e-05,
+      "loss": 0.1772,
+      "step": 8900
+    },
+    {
+      "epoch": 8.19,
+      "grad_norm": 0.10428429394960403,
+      "learning_rate": 2.3622643878662696e-05,
+      "loss": 0.1524,
+      "step": 9000
+    },
+    {
+      "epoch": 8.28,
+      "grad_norm": 9.646160125732422,
+      "learning_rate": 2.1364825577409422e-05,
+      "loss": 0.1023,
+      "step": 9100
+    },
+    {
+      "epoch": 8.37,
+      "grad_norm": 0.3879956305027008,
+      "learning_rate": 1.9212114974565664e-05,
+      "loss": 0.1421,
+      "step": 9200
+    },
+    {
+      "epoch": 8.46,
+      "grad_norm": 0.022449787706136703,
+      "learning_rate": 1.7166271048247792e-05,
+      "loss": 0.1101,
+      "step": 9300
+    },
+    {
+      "epoch": 8.55,
+      "grad_norm": 4.682805061340332,
+      "learning_rate": 1.5228965455896053e-05,
+      "loss": 0.1355,
+      "step": 9400
+    },
+    {
+      "epoch": 8.64,
+      "grad_norm": 4.212618350982666,
+      "learning_rate": 1.3401781168364589e-05,
+      "loss": 0.1465,
+      "step": 9500
+    },
+    {
+      "epoch": 8.74,
+      "grad_norm": 0.17709462344646454,
+      "learning_rate": 1.1686211176477206e-05,
+      "loss": 0.1375,
+      "step": 9600
+    },
+    {
+      "epoch": 8.83,
+      "grad_norm": 7.981707572937012,
+      "learning_rate": 1.0083657271105799e-05,
+      "loss": 0.1498,
+      "step": 9700
+    },
+    {
+      "epoch": 8.92,
+      "grad_norm": 7.477297306060791,
+      "learning_rate": 8.59542889776807e-06,
+      "loss": 0.1613,
+      "step": 9800
+    },
+    {
+      "epoch": 9.0,
+      "eval_accuracy": 0.9300198807157057,
+      "eval_loss": 0.34880414605140686,
+      "eval_runtime": 103.3381,
+      "eval_samples_per_second": 24.338,
+      "eval_steps_per_second": 1.529,
+      "step": 9891
+    },
+    {
+      "epoch": 9.01,
+      "grad_norm": 1.4414594173431396,
+      "learning_rate": 7.222742086680755e-06,
+      "loss": 0.1335,
+      "step": 9900
+    },
+    {
+      "epoch": 9.1,
+      "grad_norm": 4.2091474533081055,
+      "learning_rate": 5.966718459142195e-06,
+      "loss": 0.1066,
+      "step": 10000
+    },
+    {
+      "epoch": 9.19,
+      "grad_norm": 18.196033477783203,
+      "learning_rate": 4.828384311056549e-06,
+      "loss": 0.125,
+      "step": 10100
+    },
+    {
+      "epoch": 9.28,
+      "grad_norm": 2.323212146759033,
+      "learning_rate": 3.8086697743481664e-06,
+      "loss": 0.1239,
+      "step": 10200
+    },
+    {
+      "epoch": 9.37,
+      "grad_norm": 0.2004556953907013,
+      "learning_rate": 2.9084080569515775e-06,
+      "loss": 0.1076,
+      "step": 10300
+    },
+    {
+      "epoch": 9.46,
+      "grad_norm": 12.655696868896484,
+      "learning_rate": 2.128334761997924e-06,
+      "loss": 0.1054,
+      "step": 10400
+    },
+    {
+      "epoch": 9.55,
+      "grad_norm": 0.03721316158771515,
+      "learning_rate": 1.469087286754289e-06,
+      "loss": 0.125,
+      "step": 10500
+    },
+    {
+      "epoch": 9.65,
+      "grad_norm": 3.7458605766296387,
+      "learning_rate": 9.31204301806776e-07,
+      "loss": 0.1161,
+      "step": 10600
+    },
+    {
+      "epoch": 9.74,
+      "grad_norm": 0.63627690076828,
+      "learning_rate": 5.151253109133391e-07,
+      "loss": 0.1342,
+      "step": 10700
+    },
+    {
+      "epoch": 9.83,
+      "grad_norm": 7.162803649902344,
+      "learning_rate": 2.211902918855313e-07,
+      "loss": 0.1365,
+      "step": 10800
+    },
+    {
+      "epoch": 9.92,
+      "grad_norm": 1.5799212455749512,
+      "learning_rate": 4.9639418792951634e-08,
+      "loss": 0.1102,
+      "step": 10900
+    },
+    {
+      "epoch": 10.0,
+      "eval_accuracy": 0.9280318091451292,
+      "eval_loss": 0.34852299094200134,
+      "eval_runtime": 103.4235,
+      "eval_samples_per_second": 24.317,
+      "eval_steps_per_second": 1.528,
+      "step": 10990
+    },
+    {
+      "epoch": 10.0,
+      "step": 10990,
+      "total_flos": 4.09349935387607e+19,
+      "train_loss": 0.45005689912540897,
+      "train_runtime": 16363.7477,
+      "train_samples_per_second": 10.744,
+      "train_steps_per_second": 0.672
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 10990,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 4.09349935387607e+19,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}