Training in progress, step 450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +1064 -6

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31abb3abf745a31fa3e91a9d6557b026e0b022dc8c17b31faf21b9d0a2d04ca0
 size 313820248

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f2949f8bfd6c2486e8c8437272fec824b2b55d5651e8271f224549cbbf155f8
 size 313820248

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:543196211e770e50590194bf4656d87a9cc07c170365608b040942d8a91d2ef8
 size 159641284

 version https://git-lfs.github.com/spec/v1
+oid sha256:d4a3e3200cf21bb308bc55748d4cf98703787dd3c5a0f95c6b4e89c003cb94c0
 size 159641284

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:07cac475cd3c2db2e9d65144b80211af31f218ae1bb46eeef8f2e342f0fc8cfc
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:69e1c2bebba41f590f9467ccff5fca3b020c9e3c3da34a470b54a2ff645a049e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2681c20f6b04cb297ca42ba79b92543a57c49e07fb40458eca8cb625497628aa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9636ae38b683f4b5b714bdf172e563b0c593e0efe94f07eea78547963bfbfae
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.901733160018921,
-  "best_model_checkpoint": "miner_id_24/checkpoint-300",
-  "epoch": 0.19598236158745713,
   "eval_steps": 150,
-  "global_step": 300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2131,6 +2131,1064 @@
       "eval_samples_per_second": 51.917,
       "eval_steps_per_second": 12.989,
       "step": 300
     }
   ],
   "logging_steps": 1,
@@ -2154,12 +3212,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 6.162242363562394e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.8720966577529907,
+  "best_model_checkpoint": "miner_id_24/checkpoint-450",
+  "epoch": 0.29397354238118567,
   "eval_steps": 150,
+  "global_step": 450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 51.917,
       "eval_steps_per_second": 12.989,
       "step": 300
+    },
+    {
+      "epoch": 0.19663563612608198,
+      "grad_norm": 0.6562613844871521,
+      "learning_rate": 2.6813618894527138e-05,
+      "loss": 1.4429,
+      "step": 301
+    },
+    {
+      "epoch": 0.19728891066470686,
+      "grad_norm": 0.7796823978424072,
+      "learning_rate": 2.6490591592961578e-05,
+      "loss": 1.6504,
+      "step": 302
+    },
+    {
+      "epoch": 0.1979421852033317,
+      "grad_norm": 0.7866513729095459,
+      "learning_rate": 2.6168819172567392e-05,
+      "loss": 1.7214,
+      "step": 303
+    },
+    {
+      "epoch": 0.19859545974195655,
+      "grad_norm": 0.7742399573326111,
+      "learning_rate": 2.5848318808857606e-05,
+      "loss": 1.708,
+      "step": 304
+    },
+    {
+      "epoch": 0.1992487342805814,
+      "grad_norm": 0.8145656585693359,
+      "learning_rate": 2.5529107609445733e-05,
+      "loss": 1.8016,
+      "step": 305
+    },
+    {
+      "epoch": 0.19990200881920628,
+      "grad_norm": 0.7754919528961182,
+      "learning_rate": 2.521120261313241e-05,
+      "loss": 1.3985,
+      "step": 306
+    },
+    {
+      "epoch": 0.20055528335783113,
+      "grad_norm": 0.8018137812614441,
+      "learning_rate": 2.4894620788996037e-05,
+      "loss": 1.7272,
+      "step": 307
+    },
+    {
+      "epoch": 0.20120855789645598,
+      "grad_norm": 0.8388513922691345,
+      "learning_rate": 2.457937903548695e-05,
+      "loss": 1.7512,
+      "step": 308
+    },
+    {
+      "epoch": 0.20186183243508085,
+      "grad_norm": 0.8507857918739319,
+      "learning_rate": 2.426549417952542e-05,
+      "loss": 1.7692,
+      "step": 309
+    },
+    {
+      "epoch": 0.2025151069737057,
+      "grad_norm": 0.852091372013092,
+      "learning_rate": 2.3952982975603496e-05,
+      "loss": 1.7928,
+      "step": 310
+    },
+    {
+      "epoch": 0.20316838151233055,
+      "grad_norm": 0.8825722336769104,
+      "learning_rate": 2.3641862104890595e-05,
+      "loss": 1.7296,
+      "step": 311
+    },
+    {
+      "epoch": 0.20382165605095542,
+      "grad_norm": 0.8713802099227905,
+      "learning_rate": 2.3332148174343254e-05,
+      "loss": 1.6381,
+      "step": 312
+    },
+    {
+      "epoch": 0.20447493058958027,
+      "grad_norm": 0.9311487078666687,
+      "learning_rate": 2.3023857715818532e-05,
+      "loss": 1.6957,
+      "step": 313
+    },
+    {
+      "epoch": 0.20512820512820512,
+      "grad_norm": 0.932761013507843,
+      "learning_rate": 2.2717007185191674e-05,
+      "loss": 1.5765,
+      "step": 314
+    },
+    {
+      "epoch": 0.20578147966683,
+      "grad_norm": 0.9953693151473999,
+      "learning_rate": 2.24116129614777e-05,
+      "loss": 1.9627,
+      "step": 315
+    },
+    {
+      "epoch": 0.20643475420545485,
+      "grad_norm": 1.0859706401824951,
+      "learning_rate": 2.2107691345957133e-05,
+      "loss": 1.9455,
+      "step": 316
+    },
+    {
+      "epoch": 0.2070880287440797,
+      "grad_norm": 1.123296856880188,
+      "learning_rate": 2.1805258561305862e-05,
+      "loss": 1.9259,
+      "step": 317
+    },
+    {
+      "epoch": 0.20774130328270457,
+      "grad_norm": 1.0700737237930298,
+      "learning_rate": 2.1504330750729186e-05,
+      "loss": 1.6377,
+      "step": 318
+    },
+    {
+      "epoch": 0.20839457782132942,
+      "grad_norm": 1.1277694702148438,
+      "learning_rate": 2.120492397710022e-05,
+      "loss": 1.7249,
+      "step": 319
+    },
+    {
+      "epoch": 0.20904785235995427,
+      "grad_norm": 1.2123768329620361,
+      "learning_rate": 2.090705422210237e-05,
+      "loss": 1.9393,
+      "step": 320
+    },
+    {
+      "epoch": 0.20970112689857912,
+      "grad_norm": 1.2336665391921997,
+      "learning_rate": 2.061073738537635e-05,
+      "loss": 1.7775,
+      "step": 321
+    },
+    {
+      "epoch": 0.210354401437204,
+      "grad_norm": 1.2337082624435425,
+      "learning_rate": 2.0315989283671473e-05,
+      "loss": 1.8453,
+      "step": 322
+    },
+    {
+      "epoch": 0.21100767597582884,
+      "grad_norm": 1.2970980405807495,
+      "learning_rate": 2.0022825650001387e-05,
+      "loss": 1.8926,
+      "step": 323
+    },
+    {
+      "epoch": 0.2116609505144537,
+      "grad_norm": 1.3687418699264526,
+      "learning_rate": 1.9731262132804274e-05,
+      "loss": 2.0265,
+      "step": 324
+    },
+    {
+      "epoch": 0.21231422505307856,
+      "grad_norm": 1.4175206422805786,
+      "learning_rate": 1.9441314295107537e-05,
+      "loss": 1.9937,
+      "step": 325
+    },
+    {
+      "epoch": 0.2129674995917034,
+      "grad_norm": 1.4179284572601318,
+      "learning_rate": 1.9152997613697183e-05,
+      "loss": 1.7641,
+      "step": 326
+    },
+    {
+      "epoch": 0.21362077413032826,
+      "grad_norm": 1.5280499458312988,
+      "learning_rate": 1.8866327478291546e-05,
+      "loss": 1.9075,
+      "step": 327
+    },
+    {
+      "epoch": 0.21427404866895314,
+      "grad_norm": 1.6011749505996704,
+      "learning_rate": 1.8581319190720035e-05,
+      "loss": 1.9116,
+      "step": 328
+    },
+    {
+      "epoch": 0.21492732320757799,
+      "grad_norm": 1.6942389011383057,
+      "learning_rate": 1.8297987964106115e-05,
+      "loss": 1.9134,
+      "step": 329
+    },
+    {
+      "epoch": 0.21558059774620283,
+      "grad_norm": 1.9133878946304321,
+      "learning_rate": 1.801634892205545e-05,
+      "loss": 1.9993,
+      "step": 330
+    },
+    {
+      "epoch": 0.2162338722848277,
+      "grad_norm": 1.8108885288238525,
+      "learning_rate": 1.7736417097848506e-05,
+      "loss": 1.6007,
+      "step": 331
+    },
+    {
+      "epoch": 0.21688714682345256,
+      "grad_norm": 2.2170968055725098,
+      "learning_rate": 1.7458207433638223e-05,
+      "loss": 2.0009,
+      "step": 332
+    },
+    {
+      "epoch": 0.2175404213620774,
+      "grad_norm": 2.21116042137146,
+      "learning_rate": 1.718173477965236e-05,
+      "loss": 1.7992,
+      "step": 333
+    },
+    {
+      "epoch": 0.21819369590070228,
+      "grad_norm": 2.4326541423797607,
+      "learning_rate": 1.6907013893400837e-05,
+      "loss": 2.0342,
+      "step": 334
+    },
+    {
+      "epoch": 0.21884697043932713,
+      "grad_norm": 2.476644515991211,
+      "learning_rate": 1.6634059438888033e-05,
+      "loss": 1.6655,
+      "step": 335
+    },
+    {
+      "epoch": 0.21950024497795198,
+      "grad_norm": 2.754042387008667,
+      "learning_rate": 1.636288598583e-05,
+      "loss": 2.1341,
+      "step": 336
+    },
+    {
+      "epoch": 0.22015351951657683,
+      "grad_norm": 2.8820323944091797,
+      "learning_rate": 1.6093508008876857e-05,
+      "loss": 1.7264,
+      "step": 337
+    },
+    {
+      "epoch": 0.2208067940552017,
+      "grad_norm": 2.952387571334839,
+      "learning_rate": 1.5825939886840037e-05,
+      "loss": 1.9281,
+      "step": 338
+    },
+    {
+      "epoch": 0.22146006859382655,
+      "grad_norm": 2.917340040206909,
+      "learning_rate": 1.5560195901924894e-05,
+      "loss": 1.9721,
+      "step": 339
+    },
+    {
+      "epoch": 0.2221133431324514,
+      "grad_norm": 3.519662380218506,
+      "learning_rate": 1.5296290238968303e-05,
+      "loss": 2.2708,
+      "step": 340
+    },
+    {
+      "epoch": 0.22276661767107628,
+      "grad_norm": 3.6640732288360596,
+      "learning_rate": 1.50342369846815e-05,
+      "loss": 1.9861,
+      "step": 341
+    },
+    {
+      "epoch": 0.22341989220970113,
+      "grad_norm": 3.531583547592163,
+      "learning_rate": 1.4774050126898164e-05,
+      "loss": 2.135,
+      "step": 342
+    },
+    {
+      "epoch": 0.22407316674832597,
+      "grad_norm": 4.102410793304443,
+      "learning_rate": 1.451574355382776e-05,
+      "loss": 2.0721,
+      "step": 343
+    },
+    {
+      "epoch": 0.22472644128695085,
+      "grad_norm": 3.8423666954040527,
+      "learning_rate": 1.425933105331429e-05,
+      "loss": 2.0958,
+      "step": 344
+    },
+    {
+      "epoch": 0.2253797158255757,
+      "grad_norm": 3.899322986602783,
+      "learning_rate": 1.4004826312100216e-05,
+      "loss": 2.1085,
+      "step": 345
+    },
+    {
+      "epoch": 0.22603299036420055,
+      "grad_norm": 4.240949630737305,
+      "learning_rate": 1.3752242915095992e-05,
+      "loss": 2.0961,
+      "step": 346
+    },
+    {
+      "epoch": 0.22668626490282542,
+      "grad_norm": 4.414612293243408,
+      "learning_rate": 1.3501594344654884e-05,
+      "loss": 1.8973,
+      "step": 347
+    },
+    {
+      "epoch": 0.22733953944145027,
+      "grad_norm": 5.2035746574401855,
+      "learning_rate": 1.3252893979853304e-05,
+      "loss": 2.3987,
+      "step": 348
+    },
+    {
+      "epoch": 0.22799281398007512,
+      "grad_norm": 6.975047588348389,
+      "learning_rate": 1.3006155095776707e-05,
+      "loss": 2.3935,
+      "step": 349
+    },
+    {
+      "epoch": 0.2286460885187,
+      "grad_norm": 9.769250869750977,
+      "learning_rate": 1.2761390862810907e-05,
+      "loss": 3.1776,
+      "step": 350
+    },
+    {
+      "epoch": 0.22929936305732485,
+      "grad_norm": 0.6493518948554993,
+      "learning_rate": 1.2518614345939212e-05,
+      "loss": 1.517,
+      "step": 351
+    },
+    {
+      "epoch": 0.2299526375959497,
+      "grad_norm": 0.7682214379310608,
+      "learning_rate": 1.227783850404487e-05,
+      "loss": 1.7512,
+      "step": 352
+    },
+    {
+      "epoch": 0.23060591213457454,
+      "grad_norm": 0.7854646444320679,
+      "learning_rate": 1.2039076189219517e-05,
+      "loss": 1.7238,
+      "step": 353
+    },
+    {
+      "epoch": 0.23125918667319942,
+      "grad_norm": 0.7929527163505554,
+      "learning_rate": 1.1802340146077045e-05,
+      "loss": 1.8564,
+      "step": 354
+    },
+    {
+      "epoch": 0.23191246121182427,
+      "grad_norm": 0.8244699835777283,
+      "learning_rate": 1.1567643011073392e-05,
+      "loss": 1.6421,
+      "step": 355
+    },
+    {
+      "epoch": 0.23256573575044912,
+      "grad_norm": 0.8072249889373779,
+      "learning_rate": 1.1334997311832002e-05,
+      "loss": 1.6762,
+      "step": 356
+    },
+    {
+      "epoch": 0.233219010289074,
+      "grad_norm": 0.8119028806686401,
+      "learning_rate": 1.1104415466475087e-05,
+      "loss": 1.6528,
+      "step": 357
+    },
+    {
+      "epoch": 0.23387228482769884,
+      "grad_norm": 0.8597144484519958,
+      "learning_rate": 1.0875909782960886e-05,
+      "loss": 1.8121,
+      "step": 358
+    },
+    {
+      "epoch": 0.2345255593663237,
+      "grad_norm": 0.8278293609619141,
+      "learning_rate": 1.0649492458426564e-05,
+      "loss": 1.6059,
+      "step": 359
+    },
+    {
+      "epoch": 0.23517883390494856,
+      "grad_norm": 0.8635007739067078,
+      "learning_rate": 1.0425175578537299e-05,
+      "loss": 1.7619,
+      "step": 360
+    },
+    {
+      "epoch": 0.2358321084435734,
+      "grad_norm": 0.873079776763916,
+      "learning_rate": 1.020297111684101e-05,
+      "loss": 1.7476,
+      "step": 361
+    },
+    {
+      "epoch": 0.23648538298219826,
+      "grad_norm": 0.8993417024612427,
+      "learning_rate": 9.98289093412938e-06,
+      "loss": 1.7883,
+      "step": 362
+    },
+    {
+      "epoch": 0.23713865752082314,
+      "grad_norm": 0.8890747427940369,
+      "learning_rate": 9.764946777804646e-06,
+      "loss": 1.6988,
+      "step": 363
+    },
+    {
+      "epoch": 0.23779193205944799,
+      "grad_norm": 0.926846981048584,
+      "learning_rate": 9.549150281252633e-06,
+      "loss": 1.618,
+      "step": 364
+    },
+    {
+      "epoch": 0.23844520659807283,
+      "grad_norm": 0.9523332118988037,
+      "learning_rate": 9.335512963221732e-06,
+      "loss": 1.6626,
+      "step": 365
+    },
+    {
+      "epoch": 0.2390984811366977,
+      "grad_norm": 0.9900181293487549,
+      "learning_rate": 9.124046227208082e-06,
+      "loss": 1.646,
+      "step": 366
+    },
+    {
+      "epoch": 0.23975175567532256,
+      "grad_norm": 1.0082789659500122,
+      "learning_rate": 8.914761360846869e-06,
+      "loss": 1.7179,
+      "step": 367
+    },
+    {
+      "epoch": 0.2404050302139474,
+      "grad_norm": 1.045430302619934,
+      "learning_rate": 8.707669535309793e-06,
+      "loss": 1.6883,
+      "step": 368
+    },
+    {
+      "epoch": 0.24105830475257226,
+      "grad_norm": 1.0794562101364136,
+      "learning_rate": 8.502781804708826e-06,
+      "loss": 1.8601,
+      "step": 369
+    },
+    {
+      "epoch": 0.24171157929119713,
+      "grad_norm": 1.1484591960906982,
+      "learning_rate": 8.30010910550611e-06,
+      "loss": 1.7968,
+      "step": 370
+    },
+    {
+      "epoch": 0.24236485382982198,
+      "grad_norm": 1.1732699871063232,
+      "learning_rate": 8.09966225593024e-06,
+      "loss": 1.6156,
+      "step": 371
+    },
+    {
+      "epoch": 0.24301812836844683,
+      "grad_norm": 1.1297372579574585,
+      "learning_rate": 7.901451955398792e-06,
+      "loss": 1.5771,
+      "step": 372
+    },
+    {
+      "epoch": 0.2436714029070717,
+      "grad_norm": 1.2523436546325684,
+      "learning_rate": 7.705488783947202e-06,
+      "loss": 2.0035,
+      "step": 373
+    },
+    {
+      "epoch": 0.24432467744569655,
+      "grad_norm": 1.260083794593811,
+      "learning_rate": 7.511783201664052e-06,
+      "loss": 1.8555,
+      "step": 374
+    },
+    {
+      "epoch": 0.2449779519843214,
+      "grad_norm": 1.3140181303024292,
+      "learning_rate": 7.320345548132679e-06,
+      "loss": 1.8955,
+      "step": 375
+    },
+    {
+      "epoch": 0.24563122652294628,
+      "grad_norm": 1.4021167755126953,
+      "learning_rate": 7.131186041879357e-06,
+      "loss": 1.7796,
+      "step": 376
+    },
+    {
+      "epoch": 0.24628450106157113,
+      "grad_norm": 1.448630690574646,
+      "learning_rate": 6.944314779827749e-06,
+      "loss": 1.9388,
+      "step": 377
+    },
+    {
+      "epoch": 0.24693777560019597,
+      "grad_norm": 1.4907878637313843,
+      "learning_rate": 6.759741736760061e-06,
+      "loss": 1.7724,
+      "step": 378
+    },
+    {
+      "epoch": 0.24759105013882085,
+      "grad_norm": 1.641314148902893,
+      "learning_rate": 6.577476764784546e-06,
+      "loss": 1.8177,
+      "step": 379
+    },
+    {
+      "epoch": 0.2482443246774457,
+      "grad_norm": 1.7289384603500366,
+      "learning_rate": 6.397529592809614e-06,
+      "loss": 2.0931,
+      "step": 380
+    },
+    {
+      "epoch": 0.24889759921607055,
+      "grad_norm": 1.696372389793396,
+      "learning_rate": 6.219909826024589e-06,
+      "loss": 1.7295,
+      "step": 381
+    },
+    {
+      "epoch": 0.24955087375469542,
+      "grad_norm": 2.057739496231079,
+      "learning_rate": 6.0446269453868945e-06,
+      "loss": 1.9874,
+      "step": 382
+    },
+    {
+      "epoch": 0.25020414829332027,
+      "grad_norm": 2.0013246536254883,
+      "learning_rate": 5.871690307116107e-06,
+      "loss": 1.8563,
+      "step": 383
+    },
+    {
+      "epoch": 0.2508574228319451,
+      "grad_norm": 2.1932458877563477,
+      "learning_rate": 5.701109142194422e-06,
+      "loss": 2.0006,
+      "step": 384
+    },
+    {
+      "epoch": 0.25151069737056997,
+      "grad_norm": 2.1517393589019775,
+      "learning_rate": 5.532892555874059e-06,
+      "loss": 1.9219,
+      "step": 385
+    },
+    {
+      "epoch": 0.2521639719091948,
+      "grad_norm": 2.4914627075195312,
+      "learning_rate": 5.3670495271910925e-06,
+      "loss": 2.1114,
+      "step": 386
+    },
+    {
+      "epoch": 0.2528172464478197,
+      "grad_norm": 2.510403633117676,
+      "learning_rate": 5.203588908486279e-06,
+      "loss": 1.6452,
+      "step": 387
+    },
+    {
+      "epoch": 0.25347052098644457,
+      "grad_norm": 3.1688222885131836,
+      "learning_rate": 5.042519424932513e-06,
+      "loss": 2.1944,
+      "step": 388
+    },
+    {
+      "epoch": 0.2541237955250694,
+      "grad_norm": 3.033691167831421,
+      "learning_rate": 4.883849674069058e-06,
+      "loss": 1.8288,
+      "step": 389
+    },
+    {
+      "epoch": 0.25477707006369427,
+      "grad_norm": 3.0903337001800537,
+      "learning_rate": 4.727588125342669e-06,
+      "loss": 1.8231,
+      "step": 390
+    },
+    {
+      "epoch": 0.2554303446023191,
+      "grad_norm": 3.200044631958008,
+      "learning_rate": 4.573743119655516e-06,
+      "loss": 2.1962,
+      "step": 391
+    },
+    {
+      "epoch": 0.25608361914094396,
+      "grad_norm": 3.5135629177093506,
+      "learning_rate": 4.422322868919937e-06,
+      "loss": 1.8037,
+      "step": 392
+    },
+    {
+      "epoch": 0.2567368936795688,
+      "grad_norm": 3.3575761318206787,
+      "learning_rate": 4.273335455620097e-06,
+      "loss": 1.5867,
+      "step": 393
+    },
+    {
+      "epoch": 0.2573901682181937,
+      "grad_norm": 4.191507816314697,
+      "learning_rate": 4.126788832380629e-06,
+      "loss": 2.2939,
+      "step": 394
+    },
+    {
+      "epoch": 0.25804344275681856,
+      "grad_norm": 3.7249908447265625,
+      "learning_rate": 3.982690821542035e-06,
+      "loss": 1.874,
+      "step": 395
+    },
+    {
+      "epoch": 0.2586967172954434,
+      "grad_norm": 3.9365010261535645,
+      "learning_rate": 3.8410491147432395e-06,
+      "loss": 1.6268,
+      "step": 396
+    },
+    {
+      "epoch": 0.25934999183406826,
+      "grad_norm": 4.847893238067627,
+      "learning_rate": 3.7018712725109926e-06,
+      "loss": 2.2378,
+      "step": 397
+    },
+    {
+      "epoch": 0.2600032663726931,
+      "grad_norm": 4.60577392578125,
+      "learning_rate": 3.5651647238562904e-06,
+      "loss": 2.1083,
+      "step": 398
+    },
+    {
+      "epoch": 0.26065654091131796,
+      "grad_norm": 5.157360553741455,
+      "learning_rate": 3.430936765877857e-06,
+      "loss": 2.6833,
+      "step": 399
+    },
+    {
+      "epoch": 0.26130981544994286,
+      "grad_norm": 7.585251331329346,
+      "learning_rate": 3.299194563372604e-06,
+      "loss": 2.6346,
+      "step": 400
+    },
+    {
+      "epoch": 0.2619630899885677,
+      "grad_norm": 0.5975827574729919,
+      "learning_rate": 3.1699451484532463e-06,
+      "loss": 1.4453,
+      "step": 401
+    },
+    {
+      "epoch": 0.26261636452719256,
+      "grad_norm": 0.7209427356719971,
+      "learning_rate": 3.0431954201728784e-06,
+      "loss": 1.6149,
+      "step": 402
+    },
+    {
+      "epoch": 0.2632696390658174,
+      "grad_norm": 0.7632143497467041,
+      "learning_rate": 2.9189521441567726e-06,
+      "loss": 1.8857,
+      "step": 403
+    },
+    {
+      "epoch": 0.26392291360444226,
+      "grad_norm": 0.7980583310127258,
+      "learning_rate": 2.797221952241219e-06,
+      "loss": 1.7687,
+      "step": 404
+    },
+    {
+      "epoch": 0.2645761881430671,
+      "grad_norm": 0.7555380463600159,
+      "learning_rate": 2.6780113421195298e-06,
+      "loss": 1.7941,
+      "step": 405
+    },
+    {
+      "epoch": 0.265229462681692,
+      "grad_norm": 0.7890255451202393,
+      "learning_rate": 2.561326676995218e-06,
+      "loss": 1.6515,
+      "step": 406
+    },
+    {
+      "epoch": 0.26588273722031686,
+      "grad_norm": 0.7849552631378174,
+      "learning_rate": 2.4471741852423237e-06,
+      "loss": 1.6997,
+      "step": 407
+    },
+    {
+      "epoch": 0.2665360117589417,
+      "grad_norm": 0.8335103392601013,
+      "learning_rate": 2.3355599600729915e-06,
+      "loss": 1.759,
+      "step": 408
+    },
+    {
+      "epoch": 0.26718928629756655,
+      "grad_norm": 0.8428650498390198,
+      "learning_rate": 2.2264899592121744e-06,
+      "loss": 1.6588,
+      "step": 409
+    },
+    {
+      "epoch": 0.2678425608361914,
+      "grad_norm": 0.8650283217430115,
+      "learning_rate": 2.1199700045797077e-06,
+      "loss": 1.7422,
+      "step": 410
+    },
+    {
+      "epoch": 0.26849583537481625,
+      "grad_norm": 0.8699631690979004,
+      "learning_rate": 2.0160057819794466e-06,
+      "loss": 1.7059,
+      "step": 411
+    },
+    {
+      "epoch": 0.2691491099134411,
+      "grad_norm": 1.0831049680709839,
+      "learning_rate": 1.9146028407958484e-06,
+      "loss": 1.8546,
+      "step": 412
+    },
+    {
+      "epoch": 0.269802384452066,
+      "grad_norm": 0.9282965660095215,
+      "learning_rate": 1.8157665936977263e-06,
+      "loss": 1.7158,
+      "step": 413
+    },
+    {
+      "epoch": 0.27045565899069085,
+      "grad_norm": 0.9040890336036682,
+      "learning_rate": 1.7195023163493252e-06,
+      "loss": 1.7635,
+      "step": 414
+    },
+    {
+      "epoch": 0.2711089335293157,
+      "grad_norm": 0.9390372633934021,
+      "learning_rate": 1.6258151471287396e-06,
+      "loss": 1.5842,
+      "step": 415
+    },
+    {
+      "epoch": 0.27176220806794055,
+      "grad_norm": 1.0083568096160889,
+      "learning_rate": 1.5347100868536246e-06,
+      "loss": 1.6398,
+      "step": 416
+    },
+    {
+      "epoch": 0.2724154826065654,
+      "grad_norm": 1.0582306385040283,
+      "learning_rate": 1.4461919985142735e-06,
+      "loss": 1.9054,
+      "step": 417
+    },
+    {
+      "epoch": 0.27306875714519024,
+      "grad_norm": 1.031751275062561,
+      "learning_rate": 1.3602656070140275e-06,
+      "loss": 1.6964,
+      "step": 418
+    },
+    {
+      "epoch": 0.27372203168381515,
+      "grad_norm": 1.1092511415481567,
+      "learning_rate": 1.27693549891707e-06,
+      "loss": 1.7938,
+      "step": 419
+    },
+    {
+      "epoch": 0.27437530622244,
+      "grad_norm": 1.1369683742523193,
+      "learning_rate": 1.196206122203647e-06,
+      "loss": 1.9818,
+      "step": 420
+    },
+    {
+      "epoch": 0.27502858076106484,
+      "grad_norm": 1.1983885765075684,
+      "learning_rate": 1.1180817860325599e-06,
+      "loss": 1.8541,
+      "step": 421
+    },
+    {
+      "epoch": 0.2756818552996897,
+      "grad_norm": 1.272538185119629,
+      "learning_rate": 1.0425666605112517e-06,
+      "loss": 1.8536,
+      "step": 422
+    },
+    {
+      "epoch": 0.27633512983831454,
+      "grad_norm": 1.3353904485702515,
+      "learning_rate": 9.696647764731337e-07,
+      "loss": 1.9665,
+      "step": 423
+    },
+    {
+      "epoch": 0.2769884043769394,
+      "grad_norm": 1.3927992582321167,
+      "learning_rate": 8.993800252624862e-07,
+      "loss": 1.8969,
+      "step": 424
+    },
+    {
+      "epoch": 0.27764167891556424,
+      "grad_norm": 1.3316028118133545,
+      "learning_rate": 8.317161585266964e-07,
+      "loss": 1.6964,
+      "step": 425
+    },
+    {
+      "epoch": 0.27829495345418914,
+      "grad_norm": 1.4543392658233643,
+      "learning_rate": 7.666767880160464e-07,
+      "loss": 1.8206,
+      "step": 426
+    },
+    {
+      "epoch": 0.278948227992814,
+      "grad_norm": 1.5308529138565063,
+      "learning_rate": 7.042653853909064e-07,
+      "loss": 1.8566,
+      "step": 427
+    },
+    {
+      "epoch": 0.27960150253143884,
+      "grad_norm": 1.5561844110488892,
+      "learning_rate": 6.444852820364222e-07,
+      "loss": 1.9246,
+      "step": 428
+    },
+    {
+      "epoch": 0.2802547770700637,
+      "grad_norm": 1.674401879310608,
+      "learning_rate": 5.87339668884701e-07,
+      "loss": 1.8902,
+      "step": 429
+    },
+    {
+      "epoch": 0.28090805160868854,
+      "grad_norm": 1.7931857109069824,
+      "learning_rate": 5.328315962444874e-07,
+      "loss": 1.8674,
+      "step": 430
+    },
+    {
+      "epoch": 0.2815613261473134,
+      "grad_norm": 1.801674485206604,
+      "learning_rate": 4.809639736383431e-07,
+      "loss": 1.8449,
+      "step": 431
+    },
+    {
+      "epoch": 0.2822146006859383,
+      "grad_norm": 1.9189867973327637,
+      "learning_rate": 4.317395696473214e-07,
+      "loss": 1.8103,
+      "step": 432
+    },
+    {
+      "epoch": 0.28286787522456314,
+      "grad_norm": 2.1448371410369873,
+      "learning_rate": 3.851610117632354e-07,
+      "loss": 1.9176,
+      "step": 433
+    },
+    {
+      "epoch": 0.283521149763188,
+      "grad_norm": 2.214698314666748,
+      "learning_rate": 3.4123078624834216e-07,
+      "loss": 1.8616,
+      "step": 434
+    },
+    {
+      "epoch": 0.28417442430181283,
+      "grad_norm": 2.553487539291382,
+      "learning_rate": 2.9995123800270476e-07,
+      "loss": 2.1907,
+      "step": 435
+    },
+    {
+      "epoch": 0.2848276988404377,
+      "grad_norm": 2.483630657196045,
+      "learning_rate": 2.613245704389644e-07,
+      "loss": 1.8343,
+      "step": 436
+    },
+    {
+      "epoch": 0.28548097337906253,
+      "grad_norm": 2.82547926902771,
+      "learning_rate": 2.2535284536476242e-07,
+      "loss": 1.8856,
+      "step": 437
+    },
+    {
+      "epoch": 0.28613424791768743,
+      "grad_norm": 2.9452755451202393,
+      "learning_rate": 1.920379828726726e-07,
+      "loss": 1.6969,
+      "step": 438
+    },
+    {
+      "epoch": 0.2867875224563123,
+      "grad_norm": 3.092703342437744,
+      "learning_rate": 1.6138176123770554e-07,
+      "loss": 1.9073,
+      "step": 439
+    },
+    {
+      "epoch": 0.28744079699493713,
+      "grad_norm": 3.6752867698669434,
+      "learning_rate": 1.333858168224178e-07,
+      "loss": 2.4456,
+      "step": 440
+    },
+    {
+      "epoch": 0.288094071533562,
+      "grad_norm": 3.8016953468322754,
+      "learning_rate": 1.0805164398952072e-07,
+      "loss": 2.0624,
+      "step": 441
+    },
+    {
+      "epoch": 0.28874734607218683,
+      "grad_norm": 3.2749228477478027,
+      "learning_rate": 8.53805950221498e-08,
+      "loss": 1.6564,
+      "step": 442
+    },
+    {
+      "epoch": 0.2894006206108117,
+      "grad_norm": 3.4237606525421143,
+      "learning_rate": 6.537388005167233e-08,
+      "loss": 1.6582,
+      "step": 443
+    },
+    {
+      "epoch": 0.2900538951494365,
+      "grad_norm": 3.6927602291107178,
+      "learning_rate": 4.8032566993089225e-08,
+      "loss": 1.9234,
+      "step": 444
+    },
+    {
+      "epoch": 0.29070716968806143,
+      "grad_norm": 4.420969486236572,
+      "learning_rate": 3.3357581488030475e-08,
+      "loss": 2.1773,
+      "step": 445
+    },
+    {
+      "epoch": 0.2913604442266863,
+      "grad_norm": 5.129368782043457,
+      "learning_rate": 2.134970685536697e-08,
+      "loss": 2.3766,
+      "step": 446
+    },
+    {
+      "epoch": 0.2920137187653111,
+      "grad_norm": 4.547641754150391,
+      "learning_rate": 1.200958404936059e-08,
+      "loss": 2.0766,
+      "step": 447
+    },
+    {
+      "epoch": 0.292666993303936,
+      "grad_norm": 4.891174793243408,
+      "learning_rate": 5.337711625497121e-09,
+      "loss": 2.1816,
+      "step": 448
+    },
+    {
+      "epoch": 0.2933202678425608,
+      "grad_norm": 6.047499656677246,
+      "learning_rate": 1.3344457138297906e-09,
+      "loss": 2.7319,
+      "step": 449
+    },
+    {
+      "epoch": 0.29397354238118567,
+      "grad_norm": 8.506380081176758,
+      "learning_rate": 0.0,
+      "loss": 2.4519,
+      "step": 450
+    },
+    {
+      "epoch": 0.29397354238118567,
+      "eval_loss": 1.8720966577529907,
+      "eval_runtime": 49.6963,
+      "eval_samples_per_second": 51.875,
+      "eval_steps_per_second": 12.979,
+      "step": 450
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 9.230704871630438e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null