Training in progress, step 18000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +703 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54574a1a58c678e30a243f8b4a3a0bbe8af33220d14ef8b42b3a78e339bd2289
 size 715030586

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b00afd84b6c9ce17eaf6cde875a1462d2a5f0a7c0b9c73a9b93dfa70356a2e2
 size 715030586

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa9d36d0137a10cfbdb87e0006e1d6ac58b82282168bdd0821c71eead1bdac32
 size 1032262338

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ebce57f62b5c08e94d3ef4d4c19d6f624921ff13378d5f419a1a0fc63ae8de2
 size 1032262338

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b23e565c0773e35ad09f1b2473ae578049f6a7780765ac862ecd6eeeee912c90
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:13c1b31558f9530223d30967d940c908110b66ae87767dc8b41640c0ec2ab3ad
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a00867a90484a28803cbe8bd9d8069ef8cdd1a463e5589e32a25c51cb663295b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:31e1f3d55bb567df3a2ebf344a0ee08608b18736ddff2de100218656482b16ab
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16d84e48d7ada91bf975c21c5daad767f717e082ce5b54bad6f761abd9bf7627
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b7068584adf4719cad732133ffdff00b498545ab4f7b6d887d675a74b59641e2
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d75cbaf389582e1d108d15c09c443d2f4c8941c4b71faeeb3723b25a447f658b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:f43ad3e51655951e2a9c021cf9bdd46d25eb6df7a162e3fc18fe50a401173803
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84d957adbd57639a95ced1440a685d29db26c75001a9b3061d2f7af9b9a721b1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:add33ce1c647f1ad24436fdd2c7095ade5081fad618777000690c7e187278b49
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.033149548337403904,
   "eval_steps": 500,
-  "global_step": 17000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11908,6 +11908,706 @@
       "learning_rate": 0.000494636149601328,
       "loss": 17.8281,
       "step": 17000
     }
   ],
   "logging_steps": 10,
@@ -11927,7 +12627,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.6732402572894142e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.035099521769015894,
   "eval_steps": 500,
+  "global_step": 18000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.000494636149601328,
       "loss": 17.8281,
       "step": 17000
+    },
+    {
+      "epoch": 0.033169048071720025,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004946328985858733,
+      "loss": 17.885,
+      "step": 17010
+    },
+    {
+      "epoch": 0.03318854780603614,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004946296475704186,
+      "loss": 17.946,
+      "step": 17020
+    },
+    {
+      "epoch": 0.03320804754035226,
+      "grad_norm": 8.5,
+      "learning_rate": 0.000494626396554964,
+      "loss": 17.7649,
+      "step": 17030
+    },
+    {
+      "epoch": 0.03322754727466838,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004946231455395093,
+      "loss": 17.8435,
+      "step": 17040
+    },
+    {
+      "epoch": 0.0332470470089845,
+      "grad_norm": 22.375,
+      "learning_rate": 0.0004946198945240546,
+      "loss": 17.7794,
+      "step": 17050
+    },
+    {
+      "epoch": 0.03326654674330062,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004946166435085999,
+      "loss": 17.7582,
+      "step": 17060
+    },
+    {
+      "epoch": 0.033286046477616744,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004946133924931453,
+      "loss": 17.9408,
+      "step": 17070
+    },
+    {
+      "epoch": 0.033305546211932865,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004946101414776906,
+      "loss": 17.8288,
+      "step": 17080
+    },
+    {
+      "epoch": 0.03332504594624898,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004946068904622359,
+      "loss": 17.8074,
+      "step": 17090
+    },
+    {
+      "epoch": 0.0333445456805651,
+      "grad_norm": 11.5,
+      "learning_rate": 0.0004946036394467813,
+      "loss": 17.7826,
+      "step": 17100
+    },
+    {
+      "epoch": 0.03336404541488122,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004946003884313266,
+      "loss": 17.8136,
+      "step": 17110
+    },
+    {
+      "epoch": 0.03338354514919734,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004945971374158719,
+      "loss": 17.7152,
+      "step": 17120
+    },
+    {
+      "epoch": 0.033403044883513464,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004945938864004172,
+      "loss": 17.6264,
+      "step": 17130
+    },
+    {
+      "epoch": 0.033422544617829585,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004945906353849625,
+      "loss": 17.6933,
+      "step": 17140
+    },
+    {
+      "epoch": 0.0334420443521457,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004945873843695078,
+      "loss": 17.7102,
+      "step": 17150
+    },
+    {
+      "epoch": 0.03346154408646182,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004945841333540531,
+      "loss": 17.7096,
+      "step": 17160
+    },
+    {
+      "epoch": 0.03348104382077794,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004945808823385984,
+      "loss": 17.5945,
+      "step": 17170
+    },
+    {
+      "epoch": 0.03350054355509406,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004945776313231438,
+      "loss": 17.7132,
+      "step": 17180
+    },
+    {
+      "epoch": 0.03352004328941018,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004945743803076891,
+      "loss": 17.8003,
+      "step": 17190
+    },
+    {
+      "epoch": 0.033539543023726304,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004945711292922344,
+      "loss": 17.8819,
+      "step": 17200
+    },
+    {
+      "epoch": 0.033559042758042425,
+      "grad_norm": 11.8125,
+      "learning_rate": 0.0004945678782767798,
+      "loss": 17.7493,
+      "step": 17210
+    },
+    {
+      "epoch": 0.03357854249235854,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004945646272613251,
+      "loss": 17.8033,
+      "step": 17220
+    },
+    {
+      "epoch": 0.03359804222667466,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004945613762458704,
+      "loss": 17.7948,
+      "step": 17230
+    },
+    {
+      "epoch": 0.03361754196099078,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004945581252304157,
+      "loss": 17.6432,
+      "step": 17240
+    },
+    {
+      "epoch": 0.0336370416953069,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004945548742149611,
+      "loss": 17.7967,
+      "step": 17250
+    },
+    {
+      "epoch": 0.03365654142962302,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004945516231995064,
+      "loss": 17.7517,
+      "step": 17260
+    },
+    {
+      "epoch": 0.033676041163939144,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004945483721840517,
+      "loss": 17.8602,
+      "step": 17270
+    },
+    {
+      "epoch": 0.03369554089825526,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004945451211685971,
+      "loss": 17.7164,
+      "step": 17280
+    },
+    {
+      "epoch": 0.03371504063257138,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004945418701531423,
+      "loss": 17.6317,
+      "step": 17290
+    },
+    {
+      "epoch": 0.0337345403668875,
+      "grad_norm": 10.5625,
+      "learning_rate": 0.0004945386191376876,
+      "loss": 17.71,
+      "step": 17300
+    },
+    {
+      "epoch": 0.03375404010120362,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004945353681222329,
+      "loss": 17.7735,
+      "step": 17310
+    },
+    {
+      "epoch": 0.03377353983551974,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004945321171067783,
+      "loss": 17.8358,
+      "step": 17320
+    },
+    {
+      "epoch": 0.03379303956983586,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004945288660913236,
+      "loss": 17.6823,
+      "step": 17330
+    },
+    {
+      "epoch": 0.033812539304151984,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004945256150758689,
+      "loss": 17.6095,
+      "step": 17340
+    },
+    {
+      "epoch": 0.0338320390384681,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004945223640604143,
+      "loss": 17.7989,
+      "step": 17350
+    },
+    {
+      "epoch": 0.03385153877278422,
+      "grad_norm": 9.25,
+      "learning_rate": 0.0004945191130449596,
+      "loss": 17.8012,
+      "step": 17360
+    },
+    {
+      "epoch": 0.03387103850710034,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004945158620295049,
+      "loss": 17.701,
+      "step": 17370
+    },
+    {
+      "epoch": 0.03389053824141646,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004945126110140502,
+      "loss": 17.6966,
+      "step": 17380
+    },
+    {
+      "epoch": 0.03391003797573258,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004945093599985956,
+      "loss": 17.7201,
+      "step": 17390
+    },
+    {
+      "epoch": 0.0339295377100487,
+      "grad_norm": 8.0,
+      "learning_rate": 0.0004945061089831409,
+      "loss": 17.7151,
+      "step": 17400
+    },
+    {
+      "epoch": 0.03394903744436482,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004945028579676862,
+      "loss": 17.5905,
+      "step": 17410
+    },
+    {
+      "epoch": 0.03396853717868094,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004944996069522316,
+      "loss": 17.7166,
+      "step": 17420
+    },
+    {
+      "epoch": 0.03398803691299706,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004944963559367769,
+      "loss": 17.7541,
+      "step": 17430
+    },
+    {
+      "epoch": 0.03400753664731318,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004944931049213222,
+      "loss": 17.6131,
+      "step": 17440
+    },
+    {
+      "epoch": 0.0340270363816293,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004944898539058674,
+      "loss": 17.6559,
+      "step": 17450
+    },
+    {
+      "epoch": 0.03404653611594542,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004944866028904128,
+      "loss": 17.7009,
+      "step": 17460
+    },
+    {
+      "epoch": 0.03406603585026154,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004944833518749581,
+      "loss": 17.7258,
+      "step": 17470
+    },
+    {
+      "epoch": 0.03408553558457766,
+      "grad_norm": 7.75,
+      "learning_rate": 0.0004944801008595034,
+      "loss": 17.687,
+      "step": 17480
+    },
+    {
+      "epoch": 0.03410503531889378,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004944768498440487,
+      "loss": 17.6723,
+      "step": 17490
+    },
+    {
+      "epoch": 0.0341245350532099,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004944735988285941,
+      "loss": 17.6988,
+      "step": 17500
+    },
+    {
+      "epoch": 0.03414403478752602,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004944703478131394,
+      "loss": 17.8097,
+      "step": 17510
+    },
+    {
+      "epoch": 0.03416353452184214,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004944670967976847,
+      "loss": 17.7569,
+      "step": 17520
+    },
+    {
+      "epoch": 0.03418303425615826,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.0004944638457822301,
+      "loss": 17.8112,
+      "step": 17530
+    },
+    {
+      "epoch": 0.034202533990474376,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004944605947667754,
+      "loss": 17.7088,
+      "step": 17540
+    },
+    {
+      "epoch": 0.0342220337247905,
+      "grad_norm": 52.75,
+      "learning_rate": 0.0004944573437513207,
+      "loss": 17.743,
+      "step": 17550
+    },
+    {
+      "epoch": 0.03424153345910662,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.000494454092735866,
+      "loss": 17.5971,
+      "step": 17560
+    },
+    {
+      "epoch": 0.03426103319342274,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004944508417204114,
+      "loss": 17.5989,
+      "step": 17570
+    },
+    {
+      "epoch": 0.03428053292773886,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004944475907049567,
+      "loss": 17.6487,
+      "step": 17580
+    },
+    {
+      "epoch": 0.03430003266205498,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.000494444339689502,
+      "loss": 17.7165,
+      "step": 17590
+    },
+    {
+      "epoch": 0.0343195323963711,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004944410886740474,
+      "loss": 17.6039,
+      "step": 17600
+    },
+    {
+      "epoch": 0.034339032130687216,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004944378376585927,
+      "loss": 17.701,
+      "step": 17610
+    },
+    {
+      "epoch": 0.03435853186500334,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.000494434586643138,
+      "loss": 17.7048,
+      "step": 17620
+    },
+    {
+      "epoch": 0.03437803159931946,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004944313356276833,
+      "loss": 17.7427,
+      "step": 17630
+    },
+    {
+      "epoch": 0.03439753133363558,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004944280846122287,
+      "loss": 17.6594,
+      "step": 17640
+    },
+    {
+      "epoch": 0.0344170310679517,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.000494424833596774,
+      "loss": 17.7552,
+      "step": 17650
+    },
+    {
+      "epoch": 0.03443653080226782,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004944215825813193,
+      "loss": 17.6453,
+      "step": 17660
+    },
+    {
+      "epoch": 0.034456030536583936,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004944183315658647,
+      "loss": 17.8145,
+      "step": 17670
+    },
+    {
+      "epoch": 0.03447553027090006,
+      "grad_norm": 9.5,
+      "learning_rate": 0.00049441508055041,
+      "loss": 17.6461,
+      "step": 17680
+    },
+    {
+      "epoch": 0.03449503000521618,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004944118295349552,
+      "loss": 17.7198,
+      "step": 17690
+    },
+    {
+      "epoch": 0.0345145297395323,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004944085785195005,
+      "loss": 17.7119,
+      "step": 17700
+    },
+    {
+      "epoch": 0.03453402947384842,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004944053275040459,
+      "loss": 17.653,
+      "step": 17710
+    },
+    {
+      "epoch": 0.03455352920816454,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004944020764885912,
+      "loss": 17.7956,
+      "step": 17720
+    },
+    {
+      "epoch": 0.03457302894248066,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004943988254731365,
+      "loss": 17.7212,
+      "step": 17730
+    },
+    {
+      "epoch": 0.034592528676796776,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004943955744576818,
+      "loss": 17.7634,
+      "step": 17740
+    },
+    {
+      "epoch": 0.0346120284111129,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004943923234422272,
+      "loss": 17.6751,
+      "step": 17750
+    },
+    {
+      "epoch": 0.03463152814542902,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004943890724267725,
+      "loss": 17.6671,
+      "step": 17760
+    },
+    {
+      "epoch": 0.03465102787974514,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004943858214113178,
+      "loss": 17.73,
+      "step": 17770
+    },
+    {
+      "epoch": 0.03467052761406126,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004943825703958632,
+      "loss": 17.713,
+      "step": 17780
+    },
+    {
+      "epoch": 0.03469002734837738,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004943793193804085,
+      "loss": 17.6888,
+      "step": 17790
+    },
+    {
+      "epoch": 0.034709527082693495,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004943760683649538,
+      "loss": 17.6683,
+      "step": 17800
+    },
+    {
+      "epoch": 0.034729026817009616,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004943728173494991,
+      "loss": 17.6479,
+      "step": 17810
+    },
+    {
+      "epoch": 0.03474852655132574,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004943695663340445,
+      "loss": 17.7058,
+      "step": 17820
+    },
+    {
+      "epoch": 0.03476802628564186,
+      "grad_norm": 9.625,
+      "learning_rate": 0.0004943663153185898,
+      "loss": 17.6571,
+      "step": 17830
+    },
+    {
+      "epoch": 0.03478752601995798,
+      "grad_norm": 11.0625,
+      "learning_rate": 0.0004943630643031351,
+      "loss": 17.6084,
+      "step": 17840
+    },
+    {
+      "epoch": 0.0348070257542741,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004943598132876805,
+      "loss": 17.6829,
+      "step": 17850
+    },
+    {
+      "epoch": 0.03482652548859022,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0004943565622722258,
+      "loss": 17.6978,
+      "step": 17860
+    },
+    {
+      "epoch": 0.034846025222906335,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004943533112567711,
+      "loss": 17.5957,
+      "step": 17870
+    },
+    {
+      "epoch": 0.034865524957222456,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004943500602413164,
+      "loss": 17.6825,
+      "step": 17880
+    },
+    {
+      "epoch": 0.03488502469153858,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004943468092258618,
+      "loss": 17.5999,
+      "step": 17890
+    },
+    {
+      "epoch": 0.0349045244258547,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004943435582104071,
+      "loss": 17.5818,
+      "step": 17900
+    },
+    {
+      "epoch": 0.03492402416017082,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004943403071949523,
+      "loss": 17.5646,
+      "step": 17910
+    },
+    {
+      "epoch": 0.03494352389448694,
+      "grad_norm": 7.875,
+      "learning_rate": 0.0004943370561794977,
+      "loss": 17.7599,
+      "step": 17920
+    },
+    {
+      "epoch": 0.03496302362880306,
+      "grad_norm": 9.875,
+      "learning_rate": 0.000494333805164043,
+      "loss": 17.7103,
+      "step": 17930
+    },
+    {
+      "epoch": 0.034982523363119175,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004943305541485883,
+      "loss": 17.5457,
+      "step": 17940
+    },
+    {
+      "epoch": 0.035002023097435296,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.0004943273031331336,
+      "loss": 17.6915,
+      "step": 17950
+    },
+    {
+      "epoch": 0.03502152283175142,
+      "grad_norm": 8.5,
+      "learning_rate": 0.000494324052117679,
+      "loss": 17.5628,
+      "step": 17960
+    },
+    {
+      "epoch": 0.03504102256606754,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004943208011022243,
+      "loss": 17.7019,
+      "step": 17970
+    },
+    {
+      "epoch": 0.03506052230038366,
+      "grad_norm": 8.0,
+      "learning_rate": 0.0004943175500867696,
+      "loss": 17.7066,
+      "step": 17980
+    },
+    {
+      "epoch": 0.03508002203469978,
+      "grad_norm": 8.5,
+      "learning_rate": 0.000494314299071315,
+      "loss": 17.5608,
+      "step": 17990
+    },
+    {
+      "epoch": 0.035099521769015894,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.0004943110480558603,
+      "loss": 17.5819,
+      "step": 18000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.889326067389196e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null