Training in progress, step 19000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +703 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b00afd84b6c9ce17eaf6cde875a1462d2a5f0a7c0b9c73a9b93dfa70356a2e2
 size 715030586

 version https://git-lfs.github.com/spec/v1
+oid sha256:24b1b3bb212bb8e7a89dfabaae6120a59b5faf8f122cf7ad539861771d8cb89a
 size 715030586

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ebce57f62b5c08e94d3ef4d4c19d6f624921ff13378d5f419a1a0fc63ae8de2
 size 1032262338

 version https://git-lfs.github.com/spec/v1
+oid sha256:229e29c1490a5a19b0d4fabe6ed475185030744313f815cbb86ca74d5cd2c449
 size 1032262338

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13c1b31558f9530223d30967d940c908110b66ae87767dc8b41640c0ec2ab3ad
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:9dde6c449de2177e60a94a9900fe6d5a14850cbd574c0318aea1700129a48c14
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31e1f3d55bb567df3a2ebf344a0ee08608b18736ddff2de100218656482b16ab
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea9cf41a0d1b98e0760cde7b6c59ff69bea027f4012ae8adfed82ffa854f9831
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7068584adf4719cad732133ffdff00b498545ab4f7b6d887d675a74b59641e2
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:accf3b70270e61a071af09840966e9ef1fc65fa1b993b5947a7d43d8b578c1b2
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f43ad3e51655951e2a9c021cf9bdd46d25eb6df7a162e3fc18fe50a401173803
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:5275cc5e18e1b6590412766faef669b7b593c775c1ba9bd63e9afe6463f5d8b8
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:add33ce1c647f1ad24436fdd2c7095ade5081fad618777000690c7e187278b49
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:b73090e5ff4d77e40aae33305c58d2deda13e4f4510f1c076acf40a9f8a97bef
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.035099521769015894,
   "eval_steps": 500,
-  "global_step": 18000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12608,6 +12608,706 @@
       "learning_rate": 0.0004943110480558603,
       "loss": 17.5819,
       "step": 18000
     }
   ],
   "logging_steps": 10,
@@ -12627,7 +13327,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.889326067389196e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.03704949520062789,
   "eval_steps": 500,
+  "global_step": 19000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004943110480558603,
       "loss": 17.5819,
       "step": 18000
+    },
+    {
+      "epoch": 0.035119021503332015,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004943077970404056,
+      "loss": 17.6622,
+      "step": 18010
+    },
+    {
+      "epoch": 0.035138521237648136,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004943045460249509,
+      "loss": 17.7021,
+      "step": 18020
+    },
+    {
+      "epoch": 0.03515802097196426,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004943012950094963,
+      "loss": 17.5496,
+      "step": 18030
+    },
+    {
+      "epoch": 0.03517752070628038,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004942980439940416,
+      "loss": 17.6953,
+      "step": 18040
+    },
+    {
+      "epoch": 0.0351970204405965,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004942947929785869,
+      "loss": 17.5717,
+      "step": 18050
+    },
+    {
+      "epoch": 0.03521652017491262,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004942915419631321,
+      "loss": 17.5987,
+      "step": 18060
+    },
+    {
+      "epoch": 0.035236019909228734,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004942882909476775,
+      "loss": 17.4626,
+      "step": 18070
+    },
+    {
+      "epoch": 0.035255519643544855,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004942850399322228,
+      "loss": 17.6371,
+      "step": 18080
+    },
+    {
+      "epoch": 0.035275019377860976,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004942817889167681,
+      "loss": 17.6286,
+      "step": 18090
+    },
+    {
+      "epoch": 0.0352945191121771,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004942785379013135,
+      "loss": 17.4806,
+      "step": 18100
+    },
+    {
+      "epoch": 0.03531401884649322,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004942752868858588,
+      "loss": 17.5438,
+      "step": 18110
+    },
+    {
+      "epoch": 0.03533351858080934,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004942720358704041,
+      "loss": 17.6634,
+      "step": 18120
+    },
+    {
+      "epoch": 0.03535301831512545,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004942687848549494,
+      "loss": 17.5197,
+      "step": 18130
+    },
+    {
+      "epoch": 0.035372518049441574,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004942655338394948,
+      "loss": 17.6001,
+      "step": 18140
+    },
+    {
+      "epoch": 0.035392017783757695,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004942622828240401,
+      "loss": 17.6117,
+      "step": 18150
+    },
+    {
+      "epoch": 0.035411517518073816,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004942590318085854,
+      "loss": 17.4802,
+      "step": 18160
+    },
+    {
+      "epoch": 0.03543101725238994,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004942557807931308,
+      "loss": 17.6808,
+      "step": 18170
+    },
+    {
+      "epoch": 0.03545051698670606,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004942525297776761,
+      "loss": 17.5905,
+      "step": 18180
+    },
+    {
+      "epoch": 0.03547001672102218,
+      "grad_norm": 11.3125,
+      "learning_rate": 0.0004942492787622214,
+      "loss": 17.5809,
+      "step": 18190
+    },
+    {
+      "epoch": 0.035489516455338294,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004942460277467667,
+      "loss": 17.7091,
+      "step": 18200
+    },
+    {
+      "epoch": 0.035509016189654415,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004942427767313121,
+      "loss": 17.5444,
+      "step": 18210
+    },
+    {
+      "epoch": 0.035528515923970536,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004942395257158573,
+      "loss": 17.5533,
+      "step": 18220
+    },
+    {
+      "epoch": 0.03554801565828666,
+      "grad_norm": 7.75,
+      "learning_rate": 0.0004942362747004026,
+      "loss": 17.6227,
+      "step": 18230
+    },
+    {
+      "epoch": 0.03556751539260278,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.000494233023684948,
+      "loss": 17.5303,
+      "step": 18240
+    },
+    {
+      "epoch": 0.0355870151269189,
+      "grad_norm": 7.84375,
+      "learning_rate": 0.0004942297726694933,
+      "loss": 17.5429,
+      "step": 18250
+    },
+    {
+      "epoch": 0.03560651486123501,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.0004942265216540386,
+      "loss": 17.6151,
+      "step": 18260
+    },
+    {
+      "epoch": 0.035626014595551134,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004942232706385839,
+      "loss": 17.42,
+      "step": 18270
+    },
+    {
+      "epoch": 0.035645514329867255,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004942200196231293,
+      "loss": 17.436,
+      "step": 18280
+    },
+    {
+      "epoch": 0.035665014064183376,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004942167686076746,
+      "loss": 17.5189,
+      "step": 18290
+    },
+    {
+      "epoch": 0.0356845137984995,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004942135175922199,
+      "loss": 17.5484,
+      "step": 18300
+    },
+    {
+      "epoch": 0.03570401353281562,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004942102665767652,
+      "loss": 17.6905,
+      "step": 18310
+    },
+    {
+      "epoch": 0.03572351326713174,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004942070155613106,
+      "loss": 17.677,
+      "step": 18320
+    },
+    {
+      "epoch": 0.03574301300144785,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.0004942037645458559,
+      "loss": 17.5109,
+      "step": 18330
+    },
+    {
+      "epoch": 0.035762512735763974,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004942005135304012,
+      "loss": 17.52,
+      "step": 18340
+    },
+    {
+      "epoch": 0.035782012470080095,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004941972625149466,
+      "loss": 17.6037,
+      "step": 18350
+    },
+    {
+      "epoch": 0.035801512204396216,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004941940114994919,
+      "loss": 17.5273,
+      "step": 18360
+    },
+    {
+      "epoch": 0.03582101193871234,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004941907604840372,
+      "loss": 17.4488,
+      "step": 18370
+    },
+    {
+      "epoch": 0.03584051167302846,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004941875094685825,
+      "loss": 17.5285,
+      "step": 18380
+    },
+    {
+      "epoch": 0.03586001140734457,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004941842584531279,
+      "loss": 17.5049,
+      "step": 18390
+    },
+    {
+      "epoch": 0.03587951114166069,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004941810074376732,
+      "loss": 17.5321,
+      "step": 18400
+    },
+    {
+      "epoch": 0.035899010875976814,
+      "grad_norm": 10.5,
+      "learning_rate": 0.0004941777564222185,
+      "loss": 17.5864,
+      "step": 18410
+    },
+    {
+      "epoch": 0.035918510610292935,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004941745054067638,
+      "loss": 17.4779,
+      "step": 18420
+    },
+    {
+      "epoch": 0.035938010344609056,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004941712543913091,
+      "loss": 17.4962,
+      "step": 18430
+    },
+    {
+      "epoch": 0.03595751007892518,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004941680033758544,
+      "loss": 17.449,
+      "step": 18440
+    },
+    {
+      "epoch": 0.0359770098132413,
+      "grad_norm": 7.84375,
+      "learning_rate": 0.0004941647523603997,
+      "loss": 17.5084,
+      "step": 18450
+    },
+    {
+      "epoch": 0.03599650954755741,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004941615013449451,
+      "loss": 17.5459,
+      "step": 18460
+    },
+    {
+      "epoch": 0.03601600928187353,
+      "grad_norm": 7.875,
+      "learning_rate": 0.0004941582503294904,
+      "loss": 17.5541,
+      "step": 18470
+    },
+    {
+      "epoch": 0.036035509016189654,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004941549993140357,
+      "loss": 17.635,
+      "step": 18480
+    },
+    {
+      "epoch": 0.036055008750505775,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.000494151748298581,
+      "loss": 17.5686,
+      "step": 18490
+    },
+    {
+      "epoch": 0.036074508484821896,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004941484972831264,
+      "loss": 17.6741,
+      "step": 18500
+    },
+    {
+      "epoch": 0.03609400821913802,
+      "grad_norm": 7.84375,
+      "learning_rate": 0.0004941452462676717,
+      "loss": 17.6735,
+      "step": 18510
+    },
+    {
+      "epoch": 0.03611350795345413,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.000494141995252217,
+      "loss": 17.5192,
+      "step": 18520
+    },
+    {
+      "epoch": 0.03613300768777025,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004941387442367624,
+      "loss": 17.3641,
+      "step": 18530
+    },
+    {
+      "epoch": 0.03615250742208637,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004941354932213077,
+      "loss": 17.586,
+      "step": 18540
+    },
+    {
+      "epoch": 0.036172007156402494,
+      "grad_norm": 8.875,
+      "learning_rate": 0.000494132242205853,
+      "loss": 17.5065,
+      "step": 18550
+    },
+    {
+      "epoch": 0.036191506890718615,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004941289911903983,
+      "loss": 17.4222,
+      "step": 18560
+    },
+    {
+      "epoch": 0.036211006625034736,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004941257401749437,
+      "loss": 17.513,
+      "step": 18570
+    },
+    {
+      "epoch": 0.03623050635935086,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.000494122489159489,
+      "loss": 17.5073,
+      "step": 18580
+    },
+    {
+      "epoch": 0.03625000609366697,
+      "grad_norm": 11.875,
+      "learning_rate": 0.0004941192381440343,
+      "loss": 17.3283,
+      "step": 18590
+    },
+    {
+      "epoch": 0.03626950582798309,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004941159871285797,
+      "loss": 17.4716,
+      "step": 18600
+    },
+    {
+      "epoch": 0.03628900556229921,
+      "grad_norm": 8.5,
+      "learning_rate": 0.000494112736113125,
+      "loss": 17.372,
+      "step": 18610
+    },
+    {
+      "epoch": 0.036308505296615334,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004941094850976703,
+      "loss": 17.4356,
+      "step": 18620
+    },
+    {
+      "epoch": 0.036328005030931455,
+      "grad_norm": 8.0,
+      "learning_rate": 0.0004941062340822156,
+      "loss": 17.5272,
+      "step": 18630
+    },
+    {
+      "epoch": 0.036347504765247576,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.000494102983066761,
+      "loss": 17.5673,
+      "step": 18640
+    },
+    {
+      "epoch": 0.03636700449956369,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004940997320513063,
+      "loss": 17.5017,
+      "step": 18650
+    },
+    {
+      "epoch": 0.03638650423387981,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004940964810358516,
+      "loss": 17.4877,
+      "step": 18660
+    },
+    {
+      "epoch": 0.03640600396819593,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004940932300203969,
+      "loss": 17.4823,
+      "step": 18670
+    },
+    {
+      "epoch": 0.03642550370251205,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004940899790049422,
+      "loss": 17.4745,
+      "step": 18680
+    },
+    {
+      "epoch": 0.036445003436828174,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004940867279894875,
+      "loss": 17.6121,
+      "step": 18690
+    },
+    {
+      "epoch": 0.036464503171144295,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004940834769740328,
+      "loss": 17.5465,
+      "step": 18700
+    },
+    {
+      "epoch": 0.036484002905460416,
+      "grad_norm": 10.3125,
+      "learning_rate": 0.0004940802259585782,
+      "loss": 17.5631,
+      "step": 18710
+    },
+    {
+      "epoch": 0.03650350263977653,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004940769749431235,
+      "loss": 17.4597,
+      "step": 18720
+    },
+    {
+      "epoch": 0.03652300237409265,
+      "grad_norm": 13.4375,
+      "learning_rate": 0.0004940737239276688,
+      "loss": 17.4941,
+      "step": 18730
+    },
+    {
+      "epoch": 0.03654250210840877,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.0004940704729122142,
+      "loss": 17.5893,
+      "step": 18740
+    },
+    {
+      "epoch": 0.036562001842724894,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004940672218967595,
+      "loss": 17.5582,
+      "step": 18750
+    },
+    {
+      "epoch": 0.036581501577041015,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004940639708813048,
+      "loss": 17.51,
+      "step": 18760
+    },
+    {
+      "epoch": 0.036601001311357136,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004940607198658501,
+      "loss": 17.4947,
+      "step": 18770
+    },
+    {
+      "epoch": 0.03662050104567325,
+      "grad_norm": 8.0,
+      "learning_rate": 0.0004940574688503955,
+      "loss": 17.3291,
+      "step": 18780
+    },
+    {
+      "epoch": 0.03664000077998937,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004940542178349408,
+      "loss": 17.4147,
+      "step": 18790
+    },
+    {
+      "epoch": 0.03665950051430549,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004940509668194861,
+      "loss": 17.4158,
+      "step": 18800
+    },
+    {
+      "epoch": 0.03667900024862161,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004940477158040315,
+      "loss": 17.3498,
+      "step": 18810
+    },
+    {
+      "epoch": 0.036698499982937734,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004940444647885768,
+      "loss": 17.5235,
+      "step": 18820
+    },
+    {
+      "epoch": 0.036717999717253855,
+      "grad_norm": 8.125,
+      "learning_rate": 0.000494041213773122,
+      "loss": 17.5074,
+      "step": 18830
+    },
+    {
+      "epoch": 0.036737499451569976,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004940379627576673,
+      "loss": 17.5261,
+      "step": 18840
+    },
+    {
+      "epoch": 0.03675699918588609,
+      "grad_norm": 26.625,
+      "learning_rate": 0.0004940347117422127,
+      "loss": 17.4652,
+      "step": 18850
+    },
+    {
+      "epoch": 0.03677649892020221,
+      "grad_norm": 8.75,
+      "learning_rate": 0.000494031460726758,
+      "loss": 17.5258,
+      "step": 18860
+    },
+    {
+      "epoch": 0.03679599865451833,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004940282097113033,
+      "loss": 17.487,
+      "step": 18870
+    },
+    {
+      "epoch": 0.03681549838883445,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004940249586958486,
+      "loss": 17.4624,
+      "step": 18880
+    },
+    {
+      "epoch": 0.036834998123150574,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.000494021707680394,
+      "loss": 17.4346,
+      "step": 18890
+    },
+    {
+      "epoch": 0.036854497857466695,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004940184566649393,
+      "loss": 17.4845,
+      "step": 18900
+    },
+    {
+      "epoch": 0.03687399759178281,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004940152056494846,
+      "loss": 17.4851,
+      "step": 18910
+    },
+    {
+      "epoch": 0.03689349732609893,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.00049401195463403,
+      "loss": 17.423,
+      "step": 18920
+    },
+    {
+      "epoch": 0.03691299706041505,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004940087036185753,
+      "loss": 17.5391,
+      "step": 18930
+    },
+    {
+      "epoch": 0.03693249679473117,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004940054526031206,
+      "loss": 17.3817,
+      "step": 18940
+    },
+    {
+      "epoch": 0.03695199652904729,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004940022015876659,
+      "loss": 17.3288,
+      "step": 18950
+    },
+    {
+      "epoch": 0.036971496263363414,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004939989505722113,
+      "loss": 17.4329,
+      "step": 18960
+    },
+    {
+      "epoch": 0.036990995997679535,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004939956995567566,
+      "loss": 17.4302,
+      "step": 18970
+    },
+    {
+      "epoch": 0.03701049573199565,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004939924485413019,
+      "loss": 17.3948,
+      "step": 18980
+    },
+    {
+      "epoch": 0.03702999546631177,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004939891975258471,
+      "loss": 17.4144,
+      "step": 18990
+    },
+    {
+      "epoch": 0.03704949520062789,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004939859465103925,
+      "loss": 17.4472,
+      "step": 19000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.105398039319098e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null