Training in progress, step 122000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +703 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17ad77a5836e64f24a8b25727c7010e1e6bfb4221b470cc19c7984a7cf5c554f
 size 715030586

 version https://git-lfs.github.com/spec/v1
+oid sha256:bbeaafd6ccfc1a71df631284d94737e176690a5d53963ee816742d55c66f65c3
 size 715030586

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23ad6869718890391698fc79d72694fb639bc5385a3a4b6d972ede8f1014be5a
 size 1032262338

 version https://git-lfs.github.com/spec/v1
+oid sha256:d105f7bc65410e3f121dcaf59b93fed762c9a65fe7e4c8955d1292cb6a6876c9
 size 1032262338

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ad5a40960f391139477ddb9b78e0ab15a7bf780054968f56c0fa6f69b586ae0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:9da9fa9d20ad8eaec174be663669ed0dd6272da27b984848d5af57376e63a91f
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f734b5ef728d3a4ed061135c5dc113b75123bf723ff669c222a8fb8e7106ac78
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:4fa62ee06d52a6750aacfff038383024cfa35b60c5b93fdacff2bca27d4639e6
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b9933aabb9b693e63222e2aee5661ca34e76577fac4cee629457e7574e57c95
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:1810a8bef166e692355d67f304bf8cfd105103f952547985645833c2feea07b4
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:103e929287742eda28ad435c06eca2df66b7ca7172a6ecee18e8bfa9143fd447
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:e06c20da7b3de893663276090538aadc1b5a365c5cfce0a0140a2548043a2773
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d28f88f0c01c8606daa54ba4f31834795bef052a5ef8a3b5c559f7c0121b5068
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:60ff0e39fa2ada0903d0841edad35ce944a197d8f614422d9d9915f5101a0a12
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.05974635942224776,
   "eval_steps": 500,
-  "global_step": 121000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -84708,6 +84708,706 @@
       "learning_rate": 0.0004900826872074986,
       "loss": 14.854,
       "step": 121000
     }
   ],
   "logging_steps": 10,
@@ -84727,7 +85427,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.5842761105986395e+20,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.06024013098772089,
   "eval_steps": 500,
+  "global_step": 122000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004900826872074986,
       "loss": 14.854,
       "step": 121000
+    },
+    {
+      "epoch": 0.0597512971379025,
+      "grad_norm": 18.25,
+      "learning_rate": 0.0004900818641871587,
+      "loss": 14.6916,
+      "step": 121010
+    },
+    {
+      "epoch": 0.05975623485355723,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.000490081041166819,
+      "loss": 14.7784,
+      "step": 121020
+    },
+    {
+      "epoch": 0.059761172569211964,
+      "grad_norm": 14.3125,
+      "learning_rate": 0.0004900802181464792,
+      "loss": 14.6165,
+      "step": 121030
+    },
+    {
+      "epoch": 0.05976611028486669,
+      "grad_norm": 12.75,
+      "learning_rate": 0.0004900793951261394,
+      "loss": 14.8089,
+      "step": 121040
+    },
+    {
+      "epoch": 0.05977104800052142,
+      "grad_norm": 13.6875,
+      "learning_rate": 0.0004900785721057996,
+      "loss": 14.64,
+      "step": 121050
+    },
+    {
+      "epoch": 0.05977598571617616,
+      "grad_norm": 7.84375,
+      "learning_rate": 0.0004900777490854598,
+      "loss": 14.524,
+      "step": 121060
+    },
+    {
+      "epoch": 0.05978092343183088,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.00049007692606512,
+      "loss": 14.5122,
+      "step": 121070
+    },
+    {
+      "epoch": 0.05978586114748562,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004900761030447803,
+      "loss": 14.4947,
+      "step": 121080
+    },
+    {
+      "epoch": 0.05979079886314035,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004900752800244405,
+      "loss": 14.5841,
+      "step": 121090
+    },
+    {
+      "epoch": 0.059795736578795076,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004900744570041006,
+      "loss": 14.5958,
+      "step": 121100
+    },
+    {
+      "epoch": 0.05980067429444981,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004900736339837608,
+      "loss": 14.6845,
+      "step": 121110
+    },
+    {
+      "epoch": 0.05980561201010454,
+      "grad_norm": 8.625,
+      "learning_rate": 0.000490072810963421,
+      "loss": 14.6689,
+      "step": 121120
+    },
+    {
+      "epoch": 0.05981054972575927,
+      "grad_norm": 12.375,
+      "learning_rate": 0.0004900719879430812,
+      "loss": 14.6507,
+      "step": 121130
+    },
+    {
+      "epoch": 0.059815487441414,
+      "grad_norm": 9.625,
+      "learning_rate": 0.0004900711649227415,
+      "loss": 14.702,
+      "step": 121140
+    },
+    {
+      "epoch": 0.059820425157068736,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004900703419024016,
+      "loss": 14.3188,
+      "step": 121150
+    },
+    {
+      "epoch": 0.05982536287272346,
+      "grad_norm": 9.375,
+      "learning_rate": 0.0004900695188820619,
+      "loss": 14.7093,
+      "step": 121160
+    },
+    {
+      "epoch": 0.059830300588378196,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004900686958617221,
+      "loss": 14.7201,
+      "step": 121170
+    },
+    {
+      "epoch": 0.05983523830403293,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004900678728413822,
+      "loss": 14.575,
+      "step": 121180
+    },
+    {
+      "epoch": 0.05984017601968766,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004900670498210424,
+      "loss": 14.666,
+      "step": 121190
+    },
+    {
+      "epoch": 0.05984511373534239,
+      "grad_norm": 9.375,
+      "learning_rate": 0.0004900662268007027,
+      "loss": 14.6933,
+      "step": 121200
+    },
+    {
+      "epoch": 0.05985005145099712,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004900654037803628,
+      "loss": 14.8033,
+      "step": 121210
+    },
+    {
+      "epoch": 0.059854989166651856,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004900645807600231,
+      "loss": 14.4617,
+      "step": 121220
+    },
+    {
+      "epoch": 0.05985992688230658,
+      "grad_norm": 9.625,
+      "learning_rate": 0.0004900637577396833,
+      "loss": 14.7749,
+      "step": 121230
+    },
+    {
+      "epoch": 0.059864864597961316,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004900629347193435,
+      "loss": 14.6859,
+      "step": 121240
+    },
+    {
+      "epoch": 0.05986980231361605,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004900621116990038,
+      "loss": 14.5459,
+      "step": 121250
+    },
+    {
+      "epoch": 0.059874740029270776,
+      "grad_norm": 10.9375,
+      "learning_rate": 0.000490061288678664,
+      "loss": 14.7551,
+      "step": 121260
+    },
+    {
+      "epoch": 0.05987967774492551,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004900604656583241,
+      "loss": 14.719,
+      "step": 121270
+    },
+    {
+      "epoch": 0.05988461546058024,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004900596426379843,
+      "loss": 14.6706,
+      "step": 121280
+    },
+    {
+      "epoch": 0.05988955317623497,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.0004900588196176445,
+      "loss": 14.6011,
+      "step": 121290
+    },
+    {
+      "epoch": 0.0598944908918897,
+      "grad_norm": 11.25,
+      "learning_rate": 0.0004900579965973047,
+      "loss": 14.5435,
+      "step": 121300
+    },
+    {
+      "epoch": 0.059899428607544436,
+      "grad_norm": 125.0,
+      "learning_rate": 0.000490057173576965,
+      "loss": 14.7466,
+      "step": 121310
+    },
+    {
+      "epoch": 0.05990436632319916,
+      "grad_norm": 10.5625,
+      "learning_rate": 0.0004900563505566251,
+      "loss": 14.6536,
+      "step": 121320
+    },
+    {
+      "epoch": 0.059909304038853896,
+      "grad_norm": 11.6875,
+      "learning_rate": 0.0004900555275362854,
+      "loss": 14.7485,
+      "step": 121330
+    },
+    {
+      "epoch": 0.05991424175450863,
+      "grad_norm": 15.6875,
+      "learning_rate": 0.0004900547045159455,
+      "loss": 14.7255,
+      "step": 121340
+    },
+    {
+      "epoch": 0.05991917947016336,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004900538814956057,
+      "loss": 14.6954,
+      "step": 121350
+    },
+    {
+      "epoch": 0.05992411718581809,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004900530584752659,
+      "loss": 14.7095,
+      "step": 121360
+    },
+    {
+      "epoch": 0.05992905490147282,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004900522354549262,
+      "loss": 14.6034,
+      "step": 121370
+    },
+    {
+      "epoch": 0.059933992617127556,
+      "grad_norm": 14.3125,
+      "learning_rate": 0.0004900514124345863,
+      "loss": 14.5974,
+      "step": 121380
+    },
+    {
+      "epoch": 0.05993893033278228,
+      "grad_norm": 19.875,
+      "learning_rate": 0.0004900505894142466,
+      "loss": 14.6913,
+      "step": 121390
+    },
+    {
+      "epoch": 0.059943868048437016,
+      "grad_norm": 34.5,
+      "learning_rate": 0.0004900497663939068,
+      "loss": 14.6392,
+      "step": 121400
+    },
+    {
+      "epoch": 0.05994880576409175,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.000490048943373567,
+      "loss": 14.5991,
+      "step": 121410
+    },
+    {
+      "epoch": 0.059953743479746475,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004900481203532271,
+      "loss": 14.5578,
+      "step": 121420
+    },
+    {
+      "epoch": 0.05995868119540121,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004900472973328873,
+      "loss": 14.6561,
+      "step": 121430
+    },
+    {
+      "epoch": 0.05996361891105594,
+      "grad_norm": 79.5,
+      "learning_rate": 0.0004900464743125475,
+      "loss": 14.7054,
+      "step": 121440
+    },
+    {
+      "epoch": 0.05996855662671067,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004900456512922078,
+      "loss": 14.6515,
+      "step": 121450
+    },
+    {
+      "epoch": 0.0599734943423654,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.000490044828271868,
+      "loss": 14.7677,
+      "step": 121460
+    },
+    {
+      "epoch": 0.059978432058020135,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004900440052515282,
+      "loss": 14.7085,
+      "step": 121470
+    },
+    {
+      "epoch": 0.05998336977367486,
+      "grad_norm": 44.5,
+      "learning_rate": 0.0004900431822311884,
+      "loss": 14.7554,
+      "step": 121480
+    },
+    {
+      "epoch": 0.059988307489329595,
+      "grad_norm": 18.875,
+      "learning_rate": 0.0004900423592108486,
+      "loss": 14.7442,
+      "step": 121490
+    },
+    {
+      "epoch": 0.05999324520498433,
+      "grad_norm": 12.5625,
+      "learning_rate": 0.0004900415361905087,
+      "loss": 14.6697,
+      "step": 121500
+    },
+    {
+      "epoch": 0.05999818292063906,
+      "grad_norm": 10.8125,
+      "learning_rate": 0.000490040713170169,
+      "loss": 14.5874,
+      "step": 121510
+    },
+    {
+      "epoch": 0.06000312063629379,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004900398901498292,
+      "loss": 14.6752,
+      "step": 121520
+    },
+    {
+      "epoch": 0.06000805835194852,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004900390671294894,
+      "loss": 14.6608,
+      "step": 121530
+    },
+    {
+      "epoch": 0.060012996067603255,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004900382441091496,
+      "loss": 14.6988,
+      "step": 121540
+    },
+    {
+      "epoch": 0.06001793378325798,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.0004900374210888098,
+      "loss": 14.6037,
+      "step": 121550
+    },
+    {
+      "epoch": 0.060022871498912715,
+      "grad_norm": 10.6875,
+      "learning_rate": 0.00049003659806847,
+      "loss": 14.7486,
+      "step": 121560
+    },
+    {
+      "epoch": 0.06002780921456745,
+      "grad_norm": 11.0,
+      "learning_rate": 0.0004900357750481303,
+      "loss": 14.7613,
+      "step": 121570
+    },
+    {
+      "epoch": 0.060032746930222175,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004900349520277905,
+      "loss": 14.5282,
+      "step": 121580
+    },
+    {
+      "epoch": 0.06003768464587691,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004900341290074506,
+      "loss": 14.7733,
+      "step": 121590
+    },
+    {
+      "epoch": 0.06004262236153164,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004900333059871108,
+      "loss": 14.5763,
+      "step": 121600
+    },
+    {
+      "epoch": 0.06004756007718637,
+      "grad_norm": 13.3125,
+      "learning_rate": 0.000490032482966771,
+      "loss": 14.6428,
+      "step": 121610
+    },
+    {
+      "epoch": 0.0600524977928411,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004900316599464313,
+      "loss": 14.6828,
+      "step": 121620
+    },
+    {
+      "epoch": 0.060057435508495835,
+      "grad_norm": 61.75,
+      "learning_rate": 0.0004900308369260915,
+      "loss": 14.5284,
+      "step": 121630
+    },
+    {
+      "epoch": 0.06006237322415056,
+      "grad_norm": 18.625,
+      "learning_rate": 0.0004900300139057517,
+      "loss": 14.5693,
+      "step": 121640
+    },
+    {
+      "epoch": 0.060067310939805295,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004900291908854119,
+      "loss": 14.7845,
+      "step": 121650
+    },
+    {
+      "epoch": 0.06007224865546003,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.000490028367865072,
+      "loss": 14.7955,
+      "step": 121660
+    },
+    {
+      "epoch": 0.06007718637111476,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004900275448447322,
+      "loss": 14.6117,
+      "step": 121670
+    },
+    {
+      "epoch": 0.06008212408676949,
+      "grad_norm": 9.375,
+      "learning_rate": 0.0004900267218243925,
+      "loss": 14.7178,
+      "step": 121680
+    },
+    {
+      "epoch": 0.06008706180242422,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004900258988040527,
+      "loss": 14.5357,
+      "step": 121690
+    },
+    {
+      "epoch": 0.060091999518078955,
+      "grad_norm": 30.625,
+      "learning_rate": 0.0004900250757837129,
+      "loss": 14.688,
+      "step": 121700
+    },
+    {
+      "epoch": 0.06009693723373368,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004900242527633731,
+      "loss": 14.6058,
+      "step": 121710
+    },
+    {
+      "epoch": 0.060101874949388415,
+      "grad_norm": 12.0,
+      "learning_rate": 0.0004900234297430333,
+      "loss": 14.7661,
+      "step": 121720
+    },
+    {
+      "epoch": 0.06010681266504315,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004900226067226935,
+      "loss": 14.5204,
+      "step": 121730
+    },
+    {
+      "epoch": 0.060111750380697875,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004900217837023538,
+      "loss": 14.5525,
+      "step": 121740
+    },
+    {
+      "epoch": 0.06011668809635261,
+      "grad_norm": 8.25,
+      "learning_rate": 0.000490020960682014,
+      "loss": 14.7768,
+      "step": 121750
+    },
+    {
+      "epoch": 0.06012162581200734,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004900201376616741,
+      "loss": 14.5709,
+      "step": 121760
+    },
+    {
+      "epoch": 0.06012656352766207,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004900193146413343,
+      "loss": 14.5302,
+      "step": 121770
+    },
+    {
+      "epoch": 0.0601315012433168,
+      "grad_norm": 20.375,
+      "learning_rate": 0.0004900184916209945,
+      "loss": 14.6281,
+      "step": 121780
+    },
+    {
+      "epoch": 0.060136438958971535,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004900176686006547,
+      "loss": 14.6019,
+      "step": 121790
+    },
+    {
+      "epoch": 0.06014137667462626,
+      "grad_norm": 8.75,
+      "learning_rate": 0.000490016845580315,
+      "loss": 14.6996,
+      "step": 121800
+    },
+    {
+      "epoch": 0.060146314390280994,
+      "grad_norm": 9.375,
+      "learning_rate": 0.0004900160225599752,
+      "loss": 14.6491,
+      "step": 121810
+    },
+    {
+      "epoch": 0.06015125210593573,
+      "grad_norm": 12.0,
+      "learning_rate": 0.0004900151995396354,
+      "loss": 14.7936,
+      "step": 121820
+    },
+    {
+      "epoch": 0.06015618982159046,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004900143765192955,
+      "loss": 14.7424,
+      "step": 121830
+    },
+    {
+      "epoch": 0.06016112753724519,
+      "grad_norm": 15.5,
+      "learning_rate": 0.0004900135534989557,
+      "loss": 14.6962,
+      "step": 121840
+    },
+    {
+      "epoch": 0.06016606525289992,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004900127304786159,
+      "loss": 14.6642,
+      "step": 121850
+    },
+    {
+      "epoch": 0.060171002968554654,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004900119074582762,
+      "loss": 14.7013,
+      "step": 121860
+    },
+    {
+      "epoch": 0.06017594068420938,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004900110844379364,
+      "loss": 14.7083,
+      "step": 121870
+    },
+    {
+      "epoch": 0.060180878399864114,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004900102614175966,
+      "loss": 14.4611,
+      "step": 121880
+    },
+    {
+      "epoch": 0.06018581611551885,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004900094383972568,
+      "loss": 14.6936,
+      "step": 121890
+    },
+    {
+      "epoch": 0.060190753831173574,
+      "grad_norm": 26.625,
+      "learning_rate": 0.000490008615376917,
+      "loss": 14.6797,
+      "step": 121900
+    },
+    {
+      "epoch": 0.06019569154682831,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004900077923565773,
+      "loss": 14.6074,
+      "step": 121910
+    },
+    {
+      "epoch": 0.06020062926248304,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0004900069693362374,
+      "loss": 14.6961,
+      "step": 121920
+    },
+    {
+      "epoch": 0.06020556697813777,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004900061463158976,
+      "loss": 14.7742,
+      "step": 121930
+    },
+    {
+      "epoch": 0.0602105046937925,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004900053232955578,
+      "loss": 14.828,
+      "step": 121940
+    },
+    {
+      "epoch": 0.060215442409447234,
+      "grad_norm": 16.375,
+      "learning_rate": 0.000490004500275218,
+      "loss": 14.4311,
+      "step": 121950
+    },
+    {
+      "epoch": 0.06022038012510196,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004900036772548782,
+      "loss": 14.4625,
+      "step": 121960
+    },
+    {
+      "epoch": 0.060225317840756694,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004900028542345385,
+      "loss": 14.7465,
+      "step": 121970
+    },
+    {
+      "epoch": 0.06023025555641143,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004900020312141987,
+      "loss": 14.4267,
+      "step": 121980
+    },
+    {
+      "epoch": 0.06023519327206616,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004900012081938589,
+      "loss": 14.7071,
+      "step": 121990
+    },
+    {
+      "epoch": 0.06024013098772089,
+      "grad_norm": 10.0,
+      "learning_rate": 0.000490000385173519,
+      "loss": 14.6893,
+      "step": 122000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.6015940864424647e+20,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null