Model save

Browse files

Files changed (8) hide show

README.md +1 -1
all_results.json +5 -5
model.safetensors +1 -1
runs/Mar29_18-45-45_X5C922065N/events.out.tfevents.1711734345.X5C922065N.53009.7 +3 -0
runs/Mar29_18-51-27_X5C922065N/events.out.tfevents.1711734707.X5C922065N.77198.0 +3 -0
train_results.json +5 -5
trainer_state.json +15 -885
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -34,7 +34,7 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
-- train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08

 The following hyperparameters were used during training:
 - learning_rate: 0.0002
+- train_batch_size: 16
 - eval_batch_size: 8
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08

all_results.json CHANGED Viewed

@@ -5,9 +5,9 @@
     "eval_runtime": 48.4901,
     "eval_samples_per_second": 51.248,
     "eval_steps_per_second": 6.414,
-    "total_flos": 7.703325099767808e+17,
-    "train_loss": 0.1579143282675882,
-    "train_runtime": 491.3754,
-    "train_samples_per_second": 20.229,
-    "train_steps_per_second": 2.532
 }

     "eval_runtime": 48.4901,
     "eval_samples_per_second": 51.248,
     "eval_steps_per_second": 6.414,
+    "total_flos": 7439897757745152.0,
+    "train_loss": 2.0212895274162292,
+    "train_runtime": 7.7329,
+    "train_samples_per_second": 12.414,
+    "train_steps_per_second": 1.552
 }

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b61cb4e03c9bfcb4d59e04874ddb0628ad12043bcb2d77be6e778249886b256
 size 343254736

 version https://git-lfs.github.com/spec/v1
+oid sha256:17ae2382b9eca4f52fc4d17a9cb546b8971f310d461d0e085613297c710f7ff9
 size 343254736

runs/Mar29_18-45-45_X5C922065N/events.out.tfevents.1711734345.X5C922065N.53009.7 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e2c19225f69428b191c5ad332d0bce93b54ac73221e2934e3ffbf686d0a57b1
+size 6106

runs/Mar29_18-51-27_X5C922065N/events.out.tfevents.1711734707.X5C922065N.77198.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90a0ddc9125c1c57385b0ee044d4b2110eacca12fd4d08dea91f57269f606419
+size 5898

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 4.0,
-    "total_flos": 7.703325099767808e+17,
-    "train_loss": 0.1579143282675882,
-    "train_runtime": 491.3754,
-    "train_samples_per_second": 20.229,
-    "train_steps_per_second": 2.532
 }

 {
     "epoch": 4.0,
+    "total_flos": 7439897757745152.0,
+    "train_loss": 2.0212895274162292,
+    "train_runtime": 7.7329,
+    "train_samples_per_second": 12.414,
+    "train_steps_per_second": 1.552
 }

trainer_state.json CHANGED Viewed

@@ -1,906 +1,36 @@
 {
-  "best_metric": 0.21430718898773193,
-  "best_model_checkpoint": "./vit-finetune-scrap/checkpoint-1000",
   "epoch": 4.0,
   "eval_steps": 1000,
-  "global_step": 1244,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.03,
-      "grad_norm": 10.849635124206543,
-      "learning_rate": 0.00019839228295819936,
-      "loss": 0.1258,
       "step": 10
     },
-    {
-      "epoch": 0.06,
-      "grad_norm": 29.82047462463379,
-      "learning_rate": 0.00019678456591639874,
-      "loss": 0.2394,
-      "step": 20
-    },
-    {
-      "epoch": 0.1,
-      "grad_norm": 4.707005500793457,
-      "learning_rate": 0.00019517684887459809,
-      "loss": 0.234,
-      "step": 30
-    },
-    {
-      "epoch": 0.13,
-      "grad_norm": 13.66462516784668,
-      "learning_rate": 0.00019356913183279743,
-      "loss": 0.705,
-      "step": 40
-    },
-    {
-      "epoch": 0.16,
-      "grad_norm": 4.419419765472412,
-      "learning_rate": 0.00019196141479099678,
-      "loss": 0.6657,
-      "step": 50
-    },
-    {
-      "epoch": 0.19,
-      "grad_norm": 0.14946621656417847,
-      "learning_rate": 0.00019035369774919616,
-      "loss": 0.2407,
-      "step": 60
-    },
-    {
-      "epoch": 0.23,
-      "grad_norm": 2.8290460109710693,
-      "learning_rate": 0.0001887459807073955,
-      "loss": 0.3973,
-      "step": 70
-    },
-    {
-      "epoch": 0.26,
-      "grad_norm": 15.848897933959961,
-      "learning_rate": 0.00018713826366559486,
-      "loss": 0.2432,
-      "step": 80
-    },
-    {
-      "epoch": 0.29,
-      "grad_norm": 0.30860471725463867,
-      "learning_rate": 0.0001855305466237942,
-      "loss": 0.2732,
-      "step": 90
-    },
-    {
-      "epoch": 0.32,
-      "grad_norm": 14.233210563659668,
-      "learning_rate": 0.0001839228295819936,
-      "loss": 0.261,
-      "step": 100
-    },
-    {
-      "epoch": 0.35,
-      "grad_norm": 9.140750885009766,
-      "learning_rate": 0.00018231511254019294,
-      "loss": 0.1776,
-      "step": 110
-    },
-    {
-      "epoch": 0.39,
-      "grad_norm": 0.9528696537017822,
-      "learning_rate": 0.00018070739549839229,
-      "loss": 0.3849,
-      "step": 120
-    },
-    {
-      "epoch": 0.42,
-      "grad_norm": 21.716726303100586,
-      "learning_rate": 0.00017909967845659166,
-      "loss": 0.3328,
-      "step": 130
-    },
-    {
-      "epoch": 0.45,
-      "grad_norm": 7.960571765899658,
-      "learning_rate": 0.000177491961414791,
-      "loss": 0.5052,
-      "step": 140
-    },
-    {
-      "epoch": 0.48,
-      "grad_norm": 3.9136505126953125,
-      "learning_rate": 0.00017588424437299036,
-      "loss": 0.5026,
-      "step": 150
-    },
-    {
-      "epoch": 0.51,
-      "grad_norm": 14.131813049316406,
-      "learning_rate": 0.0001742765273311897,
-      "loss": 0.3997,
-      "step": 160
-    },
-    {
-      "epoch": 0.55,
-      "grad_norm": 13.529720306396484,
-      "learning_rate": 0.0001726688102893891,
-      "loss": 0.2877,
-      "step": 170
-    },
-    {
-      "epoch": 0.58,
-      "grad_norm": 6.182504653930664,
-      "learning_rate": 0.00017106109324758844,
-      "loss": 0.3408,
-      "step": 180
-    },
-    {
-      "epoch": 0.61,
-      "grad_norm": 0.17119653522968292,
-      "learning_rate": 0.0001694533762057878,
-      "loss": 0.2916,
-      "step": 190
-    },
-    {
-      "epoch": 0.64,
-      "grad_norm": 13.307029724121094,
-      "learning_rate": 0.00016784565916398716,
-      "loss": 0.3485,
-      "step": 200
-    },
-    {
-      "epoch": 0.68,
-      "grad_norm": 4.883426666259766,
-      "learning_rate": 0.0001662379421221865,
-      "loss": 0.3939,
-      "step": 210
-    },
-    {
-      "epoch": 0.71,
-      "grad_norm": 5.17271614074707,
-      "learning_rate": 0.00016463022508038586,
-      "loss": 0.4001,
-      "step": 220
-    },
-    {
-      "epoch": 0.74,
-      "grad_norm": 0.18887023627758026,
-      "learning_rate": 0.0001630225080385852,
-      "loss": 0.2459,
-      "step": 230
-    },
-    {
-      "epoch": 0.77,
-      "grad_norm": 0.3397394120693207,
-      "learning_rate": 0.0001614147909967846,
-      "loss": 0.3813,
-      "step": 240
-    },
-    {
-      "epoch": 0.8,
-      "grad_norm": 7.221404075622559,
-      "learning_rate": 0.00015980707395498394,
-      "loss": 0.2913,
-      "step": 250
-    },
-    {
-      "epoch": 0.84,
-      "grad_norm": 3.0032007694244385,
-      "learning_rate": 0.0001581993569131833,
-      "loss": 0.273,
-      "step": 260
-    },
-    {
-      "epoch": 0.87,
-      "grad_norm": 3.486640691757202,
-      "learning_rate": 0.00015659163987138264,
-      "loss": 0.5797,
-      "step": 270
-    },
-    {
-      "epoch": 0.9,
-      "grad_norm": 0.3199945092201233,
-      "learning_rate": 0.00015498392282958201,
-      "loss": 0.4904,
-      "step": 280
-    },
-    {
-      "epoch": 0.93,
-      "grad_norm": 38.1386833190918,
-      "learning_rate": 0.00015337620578778136,
-      "loss": 0.2789,
-      "step": 290
-    },
-    {
-      "epoch": 0.96,
-      "grad_norm": 3.608177661895752,
-      "learning_rate": 0.0001517684887459807,
-      "loss": 0.5587,
-      "step": 300
-    },
-    {
-      "epoch": 1.0,
-      "grad_norm": 0.1488448977470398,
-      "learning_rate": 0.0001501607717041801,
-      "loss": 0.3405,
-      "step": 310
-    },
-    {
-      "epoch": 1.03,
-      "grad_norm": 1.542035460472107,
-      "learning_rate": 0.00014855305466237944,
-      "loss": 0.2688,
-      "step": 320
-    },
-    {
-      "epoch": 1.06,
-      "grad_norm": 2.1089909076690674,
-      "learning_rate": 0.0001469453376205788,
-      "loss": 0.2085,
-      "step": 330
-    },
-    {
-      "epoch": 1.09,
-      "grad_norm": 11.16602897644043,
-      "learning_rate": 0.00014533762057877814,
-      "loss": 0.3437,
-      "step": 340
-    },
-    {
-      "epoch": 1.13,
-      "grad_norm": 2.2596559524536133,
-      "learning_rate": 0.00014372990353697752,
-      "loss": 0.337,
-      "step": 350
-    },
-    {
-      "epoch": 1.16,
-      "grad_norm": 2.616323947906494,
-      "learning_rate": 0.00014212218649517686,
-      "loss": 0.2074,
-      "step": 360
-    },
-    {
-      "epoch": 1.19,
-      "grad_norm": 0.5269195437431335,
-      "learning_rate": 0.00014051446945337621,
-      "loss": 0.0913,
-      "step": 370
-    },
-    {
-      "epoch": 1.22,
-      "grad_norm": 7.34785270690918,
-      "learning_rate": 0.0001389067524115756,
-      "loss": 0.1462,
-      "step": 380
-    },
-    {
-      "epoch": 1.25,
-      "grad_norm": 0.13304546475410461,
-      "learning_rate": 0.00013729903536977494,
-      "loss": 0.0303,
-      "step": 390
-    },
-    {
-      "epoch": 1.29,
-      "grad_norm": 0.42307400703430176,
-      "learning_rate": 0.0001356913183279743,
-      "loss": 0.2195,
-      "step": 400
-    },
-    {
-      "epoch": 1.32,
-      "grad_norm": 0.16662320494651794,
-      "learning_rate": 0.00013408360128617364,
-      "loss": 0.2999,
-      "step": 410
-    },
-    {
-      "epoch": 1.35,
-      "grad_norm": 35.579891204833984,
-      "learning_rate": 0.00013247588424437302,
-      "loss": 0.1194,
-      "step": 420
-    },
-    {
-      "epoch": 1.38,
-      "grad_norm": 3.4818050861358643,
-      "learning_rate": 0.00013086816720257237,
-      "loss": 0.1469,
-      "step": 430
-    },
-    {
-      "epoch": 1.41,
-      "grad_norm": 6.36860466003418,
-      "learning_rate": 0.00012926045016077172,
-      "loss": 0.2234,
-      "step": 440
-    },
-    {
-      "epoch": 1.45,
-      "grad_norm": 7.359828948974609,
-      "learning_rate": 0.00012765273311897106,
-      "loss": 0.2114,
-      "step": 450
-    },
-    {
-      "epoch": 1.48,
-      "grad_norm": 0.11759760975837708,
-      "learning_rate": 0.00012604501607717044,
-      "loss": 0.1059,
-      "step": 460
-    },
-    {
-      "epoch": 1.51,
-      "grad_norm": 0.049188051372766495,
-      "learning_rate": 0.0001244372990353698,
-      "loss": 0.207,
-      "step": 470
-    },
-    {
-      "epoch": 1.54,
-      "grad_norm": 0.06988845020532608,
-      "learning_rate": 0.00012282958199356914,
-      "loss": 0.1319,
-      "step": 480
-    },
-    {
-      "epoch": 1.58,
-      "grad_norm": 10.857504844665527,
-      "learning_rate": 0.0001212218649517685,
-      "loss": 0.3497,
-      "step": 490
-    },
-    {
-      "epoch": 1.61,
-      "grad_norm": 0.04112955555319786,
-      "learning_rate": 0.00011961414790996785,
-      "loss": 0.0711,
-      "step": 500
-    },
-    {
-      "epoch": 1.64,
-      "grad_norm": 20.134990692138672,
-      "learning_rate": 0.0001180064308681672,
-      "loss": 0.2654,
-      "step": 510
-    },
-    {
-      "epoch": 1.67,
-      "grad_norm": 0.03998303785920143,
-      "learning_rate": 0.00011639871382636655,
-      "loss": 0.0911,
-      "step": 520
-    },
-    {
-      "epoch": 1.7,
-      "grad_norm": 10.199617385864258,
-      "learning_rate": 0.00011479099678456593,
-      "loss": 0.1106,
-      "step": 530
-    },
-    {
-      "epoch": 1.74,
-      "grad_norm": 2.3347342014312744,
-      "learning_rate": 0.00011318327974276528,
-      "loss": 0.1948,
-      "step": 540
-    },
-    {
-      "epoch": 1.77,
-      "grad_norm": 15.492130279541016,
-      "learning_rate": 0.00011157556270096463,
-      "loss": 0.2999,
-      "step": 550
-    },
-    {
-      "epoch": 1.8,
-      "grad_norm": 16.2156982421875,
-      "learning_rate": 0.00010996784565916398,
-      "loss": 0.1792,
-      "step": 560
-    },
-    {
-      "epoch": 1.83,
-      "grad_norm": 3.9076225757598877,
-      "learning_rate": 0.00010836012861736335,
-      "loss": 0.4599,
-      "step": 570
-    },
-    {
-      "epoch": 1.86,
-      "grad_norm": 0.0662955567240715,
-      "learning_rate": 0.0001067524115755627,
-      "loss": 0.0834,
-      "step": 580
-    },
-    {
-      "epoch": 1.9,
-      "grad_norm": 0.43734121322631836,
-      "learning_rate": 0.00010514469453376205,
-      "loss": 0.1804,
-      "step": 590
-    },
-    {
-      "epoch": 1.93,
-      "grad_norm": 0.23478691279888153,
-      "learning_rate": 0.00010353697749196143,
-      "loss": 0.0831,
-      "step": 600
-    },
-    {
-      "epoch": 1.96,
-      "grad_norm": 8.97579574584961,
-      "learning_rate": 0.00010192926045016078,
-      "loss": 0.2141,
-      "step": 610
-    },
-    {
-      "epoch": 1.99,
-      "grad_norm": 5.947574615478516,
-      "learning_rate": 0.00010032154340836013,
-      "loss": 0.1059,
-      "step": 620
-    },
-    {
-      "epoch": 2.03,
-      "grad_norm": 0.3693161904811859,
-      "learning_rate": 9.871382636655949e-05,
-      "loss": 0.0478,
-      "step": 630
-    },
-    {
-      "epoch": 2.06,
-      "grad_norm": 0.33773139119148254,
-      "learning_rate": 9.710610932475884e-05,
-      "loss": 0.1512,
-      "step": 640
-    },
-    {
-      "epoch": 2.09,
-      "grad_norm": 0.07303290069103241,
-      "learning_rate": 9.54983922829582e-05,
-      "loss": 0.0746,
-      "step": 650
-    },
-    {
-      "epoch": 2.12,
-      "grad_norm": 0.021892189979553223,
-      "learning_rate": 9.389067524115757e-05,
-      "loss": 0.0071,
-      "step": 660
-    },
-    {
-      "epoch": 2.15,
-      "grad_norm": 0.699686586856842,
-      "learning_rate": 9.228295819935692e-05,
-      "loss": 0.08,
-      "step": 670
-    },
-    {
-      "epoch": 2.19,
-      "grad_norm": 1.7835339307785034,
-      "learning_rate": 9.067524115755628e-05,
-      "loss": 0.092,
-      "step": 680
-    },
-    {
-      "epoch": 2.22,
-      "grad_norm": 0.025796858593821526,
-      "learning_rate": 8.906752411575563e-05,
-      "loss": 0.041,
-      "step": 690
-    },
-    {
-      "epoch": 2.25,
-      "grad_norm": 11.788249969482422,
-      "learning_rate": 8.7459807073955e-05,
-      "loss": 0.0269,
-      "step": 700
-    },
-    {
-      "epoch": 2.28,
-      "grad_norm": 6.836824893951416,
-      "learning_rate": 8.585209003215434e-05,
-      "loss": 0.2086,
-      "step": 710
-    },
-    {
-      "epoch": 2.32,
-      "grad_norm": 0.02837471477687359,
-      "learning_rate": 8.42443729903537e-05,
-      "loss": 0.0828,
-      "step": 720
-    },
-    {
-      "epoch": 2.35,
-      "grad_norm": 0.04012266919016838,
-      "learning_rate": 8.263665594855306e-05,
-      "loss": 0.0057,
-      "step": 730
-    },
-    {
-      "epoch": 2.38,
-      "grad_norm": 0.05077001079916954,
-      "learning_rate": 8.102893890675242e-05,
-      "loss": 0.0116,
-      "step": 740
-    },
-    {
-      "epoch": 2.41,
-      "grad_norm": 0.08000744879245758,
-      "learning_rate": 7.942122186495177e-05,
-      "loss": 0.0305,
-      "step": 750
-    },
-    {
-      "epoch": 2.44,
-      "grad_norm": 0.03205496072769165,
-      "learning_rate": 7.781350482315113e-05,
-      "loss": 0.0451,
-      "step": 760
-    },
-    {
-      "epoch": 2.48,
-      "grad_norm": 0.027969710528850555,
-      "learning_rate": 7.62057877813505e-05,
-      "loss": 0.0779,
-      "step": 770
-    },
-    {
-      "epoch": 2.51,
-      "grad_norm": 3.297053098678589,
-      "learning_rate": 7.459807073954984e-05,
-      "loss": 0.028,
-      "step": 780
-    },
-    {
-      "epoch": 2.54,
-      "grad_norm": 1.8469219207763672,
-      "learning_rate": 7.299035369774921e-05,
-      "loss": 0.1222,
-      "step": 790
-    },
-    {
-      "epoch": 2.57,
-      "grad_norm": 0.5228595733642578,
-      "learning_rate": 7.138263665594856e-05,
-      "loss": 0.0087,
-      "step": 800
-    },
-    {
-      "epoch": 2.6,
-      "grad_norm": 0.028694279491901398,
-      "learning_rate": 6.977491961414792e-05,
-      "loss": 0.0053,
-      "step": 810
-    },
-    {
-      "epoch": 2.64,
-      "grad_norm": 0.026992863044142723,
-      "learning_rate": 6.816720257234727e-05,
-      "loss": 0.0065,
-      "step": 820
-    },
-    {
-      "epoch": 2.67,
-      "grad_norm": 1.996466040611267,
-      "learning_rate": 6.655948553054663e-05,
-      "loss": 0.0955,
-      "step": 830
-    },
-    {
-      "epoch": 2.7,
-      "grad_norm": 0.01983807235956192,
-      "learning_rate": 6.495176848874598e-05,
-      "loss": 0.1021,
-      "step": 840
-    },
-    {
-      "epoch": 2.73,
-      "grad_norm": 0.03182640299201012,
-      "learning_rate": 6.334405144694535e-05,
-      "loss": 0.1796,
-      "step": 850
-    },
-    {
-      "epoch": 2.77,
-      "grad_norm": 0.049088891595602036,
-      "learning_rate": 6.173633440514471e-05,
-      "loss": 0.0907,
-      "step": 860
-    },
-    {
-      "epoch": 2.8,
-      "grad_norm": 0.11043746769428253,
-      "learning_rate": 6.012861736334405e-05,
-      "loss": 0.0627,
-      "step": 870
-    },
-    {
-      "epoch": 2.83,
-      "grad_norm": 0.2206079512834549,
-      "learning_rate": 5.8520900321543414e-05,
-      "loss": 0.0412,
-      "step": 880
-    },
-    {
-      "epoch": 2.86,
-      "grad_norm": 0.02966146729886532,
-      "learning_rate": 5.6913183279742764e-05,
-      "loss": 0.1015,
-      "step": 890
-    },
-    {
-      "epoch": 2.89,
-      "grad_norm": 0.0345352403819561,
-      "learning_rate": 5.530546623794213e-05,
-      "loss": 0.0629,
-      "step": 900
-    },
-    {
-      "epoch": 2.93,
-      "grad_norm": 0.06348275393247604,
-      "learning_rate": 5.369774919614148e-05,
-      "loss": 0.0064,
-      "step": 910
-    },
-    {
-      "epoch": 2.96,
-      "grad_norm": 0.06559421122074127,
-      "learning_rate": 5.209003215434084e-05,
-      "loss": 0.0191,
-      "step": 920
-    },
-    {
-      "epoch": 2.99,
-      "grad_norm": 1.113765835762024,
-      "learning_rate": 5.048231511254019e-05,
-      "loss": 0.0259,
-      "step": 930
-    },
-    {
-      "epoch": 3.02,
-      "grad_norm": 0.02486424334347248,
-      "learning_rate": 4.887459807073955e-05,
-      "loss": 0.0049,
-      "step": 940
-    },
-    {
-      "epoch": 3.05,
-      "grad_norm": 0.7845320701599121,
-      "learning_rate": 4.726688102893891e-05,
-      "loss": 0.0043,
-      "step": 950
-    },
-    {
-      "epoch": 3.09,
-      "grad_norm": 0.021990863606333733,
-      "learning_rate": 4.5659163987138265e-05,
-      "loss": 0.0042,
-      "step": 960
-    },
-    {
-      "epoch": 3.12,
-      "grad_norm": 0.022443994879722595,
-      "learning_rate": 4.405144694533762e-05,
-      "loss": 0.0054,
-      "step": 970
-    },
-    {
-      "epoch": 3.15,
-      "grad_norm": 0.009742784313857555,
-      "learning_rate": 4.244372990353698e-05,
-      "loss": 0.0041,
-      "step": 980
-    },
-    {
-      "epoch": 3.18,
-      "grad_norm": 0.037747763097286224,
-      "learning_rate": 4.083601286173634e-05,
-      "loss": 0.0242,
-      "step": 990
-    },
-    {
-      "epoch": 3.22,
-      "grad_norm": 0.010466611944139004,
-      "learning_rate": 3.92282958199357e-05,
-      "loss": 0.0035,
-      "step": 1000
-    },
-    {
-      "epoch": 3.22,
-      "eval_accuracy": 0.9485530546623794,
-      "eval_loss": 0.21430718898773193,
-      "eval_runtime": 14.2198,
-      "eval_samples_per_second": 43.742,
-      "eval_steps_per_second": 5.485,
-      "step": 1000
-    },
-    {
-      "epoch": 3.25,
-      "grad_norm": 0.00991890113800764,
-      "learning_rate": 3.7620578778135054e-05,
-      "loss": 0.0039,
-      "step": 1010
-    },
-    {
-      "epoch": 3.28,
-      "grad_norm": 0.016740955412387848,
-      "learning_rate": 3.601286173633441e-05,
-      "loss": 0.0065,
-      "step": 1020
-    },
-    {
-      "epoch": 3.31,
-      "grad_norm": 0.03466745838522911,
-      "learning_rate": 3.4405144694533766e-05,
-      "loss": 0.099,
-      "step": 1030
-    },
-    {
-      "epoch": 3.34,
-      "grad_norm": 0.008615425787866116,
-      "learning_rate": 3.279742765273312e-05,
-      "loss": 0.0179,
-      "step": 1040
-    },
-    {
-      "epoch": 3.38,
-      "grad_norm": 0.05827281251549721,
-      "learning_rate": 3.118971061093248e-05,
-      "loss": 0.0041,
-      "step": 1050
-    },
-    {
-      "epoch": 3.41,
-      "grad_norm": 0.0276072658598423,
-      "learning_rate": 2.9581993569131832e-05,
-      "loss": 0.0036,
-      "step": 1060
-    },
-    {
-      "epoch": 3.44,
-      "grad_norm": 0.011412302032113075,
-      "learning_rate": 2.7974276527331188e-05,
-      "loss": 0.1013,
-      "step": 1070
-    },
-    {
-      "epoch": 3.47,
-      "grad_norm": 0.013982011005282402,
-      "learning_rate": 2.6366559485530545e-05,
-      "loss": 0.0058,
-      "step": 1080
-    },
-    {
-      "epoch": 3.5,
-      "grad_norm": 0.026057597249746323,
-      "learning_rate": 2.4758842443729904e-05,
-      "loss": 0.0077,
-      "step": 1090
-    },
-    {
-      "epoch": 3.54,
-      "grad_norm": 0.04853319376707077,
-      "learning_rate": 2.315112540192926e-05,
-      "loss": 0.0035,
-      "step": 1100
-    },
-    {
-      "epoch": 3.57,
-      "grad_norm": 0.013841088861227036,
-      "learning_rate": 2.154340836012862e-05,
-      "loss": 0.0208,
-      "step": 1110
-    },
-    {
-      "epoch": 3.6,
-      "grad_norm": 0.03845496475696564,
-      "learning_rate": 1.9935691318327977e-05,
-      "loss": 0.0038,
-      "step": 1120
-    },
-    {
-      "epoch": 3.63,
-      "grad_norm": 0.023922910913825035,
-      "learning_rate": 1.8327974276527333e-05,
-      "loss": 0.0032,
-      "step": 1130
-    },
-    {
-      "epoch": 3.67,
-      "grad_norm": 0.014864934608340263,
-      "learning_rate": 1.672025723472669e-05,
-      "loss": 0.0028,
-      "step": 1140
-    },
-    {
-      "epoch": 3.7,
-      "grad_norm": 0.05655550956726074,
-      "learning_rate": 1.5112540192926044e-05,
-      "loss": 0.0039,
-      "step": 1150
-    },
-    {
-      "epoch": 3.73,
-      "grad_norm": 0.012573642656207085,
-      "learning_rate": 1.3504823151125404e-05,
-      "loss": 0.0028,
-      "step": 1160
-    },
-    {
-      "epoch": 3.76,
-      "grad_norm": 0.022632773965597153,
-      "learning_rate": 1.189710610932476e-05,
-      "loss": 0.0033,
-      "step": 1170
-    },
-    {
-      "epoch": 3.79,
-      "grad_norm": 0.01279931515455246,
-      "learning_rate": 1.0289389067524116e-05,
-      "loss": 0.0238,
-      "step": 1180
-    },
-    {
-      "epoch": 3.83,
-      "grad_norm": 0.023662865161895752,
-      "learning_rate": 8.681672025723474e-06,
-      "loss": 0.0253,
-      "step": 1190
-    },
-    {
-      "epoch": 3.86,
-      "grad_norm": 0.017510054633021355,
-      "learning_rate": 7.07395498392283e-06,
-      "loss": 0.0047,
-      "step": 1200
-    },
-    {
-      "epoch": 3.89,
-      "grad_norm": 0.0257584135979414,
-      "learning_rate": 5.466237942122187e-06,
-      "loss": 0.004,
-      "step": 1210
-    },
-    {
-      "epoch": 3.92,
-      "grad_norm": 0.3079407513141632,
-      "learning_rate": 3.858520900321544e-06,
-      "loss": 0.085,
-      "step": 1220
-    },
-    {
-      "epoch": 3.95,
-      "grad_norm": 0.00990583747625351,
-      "learning_rate": 2.2508038585209006e-06,
-      "loss": 0.0029,
-      "step": 1230
-    },
-    {
-      "epoch": 3.99,
-      "grad_norm": 0.011738813482224941,
-      "learning_rate": 6.430868167202573e-07,
-      "loss": 0.034,
-      "step": 1240
-    },
     {
       "epoch": 4.0,
-      "step": 1244,
-      "total_flos": 7.703325099767808e+17,
-      "train_loss": 0.1579143282675882,
-      "train_runtime": 491.3754,
-      "train_samples_per_second": 20.229,
-      "train_steps_per_second": 2.532
     }
   ],
   "logging_steps": 10,
-  "max_steps": 1244,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 4,
   "save_steps": 1000,
-  "total_flos": 7.703325099767808e+17,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": null,
+  "best_model_checkpoint": null,
   "epoch": 4.0,
   "eval_steps": 1000,
+  "global_step": 12,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 3.33,
+      "grad_norm": 2.8058300018310547,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 2.1093,
       "step": 10
     },
     {
       "epoch": 4.0,
+      "step": 12,
+      "total_flos": 7439897757745152.0,
+      "train_loss": 2.0212895274162292,
+      "train_runtime": 7.7329,
+      "train_samples_per_second": 12.414,
+      "train_steps_per_second": 1.552
     }
   ],
   "logging_steps": 10,
+  "max_steps": 12,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 4,
   "save_steps": 1000,
+  "total_flos": 7439897757745152.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72eb25d1bd046d95141d050a7046c0b70c3fb9f17a91b9b38a15dbfa48b5b07c
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:a661c95712059d0902cb2770fd16a90f1cd9488c7804b9523f02c0e91000074b
 size 4920