Training in progress, step 700, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +713 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc7f9f12a7d975eceff199ac96f1fb4abb58016ab42da13fe25b1845a1e0b3ce
 size 289512208

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b9031cffc344e8840f4c7143987fa4e58be60af4e5110d63d50416a3f8b59f3
 size 289512208

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37a24bd2239b20338e528442d83d2139315c8017ad79ac6456a27ebc2a7c4982
 size 147781972

 version https://git-lfs.github.com/spec/v1
+oid sha256:3586c1b40b78d2d911170eb1a15bda1bb7e14d32d622befc2e331d34b42a7aff
 size 147781972

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28ace9fc649252ea1299cd2d9b1953184b717d1b1778bd2d51cf81f8fdd857fb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b885f396904d7214b2eeb837fb3989cd5db4deae210b67eca24ef3c766dfa24
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14de197ce4fca667a77214b11d375124cfec5ed9c075fb60180e734827aaa864
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0aa4ab3cdb1a9e7e00fd89c904cf6ae8c19a72f37f60ac96d0d021814a6f0bd4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.2089511156082153,
-  "best_model_checkpoint": "miner_id_24/checkpoint-600",
-  "epoch": 0.03813912836200389,
   "eval_steps": 100,
-  "global_step": 600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4263,6 +4263,714 @@
       "eval_samples_per_second": 4.035,
       "eval_steps_per_second": 1.009,
       "step": 600
     }
   ],
   "logging_steps": 1,
@@ -4291,7 +4999,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.117735147995136e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.2046868801116943,
+  "best_model_checkpoint": "miner_id_24/checkpoint-700",
+  "epoch": 0.04449564975567121,
   "eval_steps": 100,
+  "global_step": 700,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 4.035,
       "eval_steps_per_second": 1.009,
       "step": 600
+    },
+    {
+      "epoch": 0.03820269357594057,
+      "grad_norm": 0.2633255422115326,
+      "learning_rate": 3.820213483066737e-05,
+      "loss": 1.1605,
+      "step": 601
+    },
+    {
+      "epoch": 0.03826625878987724,
+      "grad_norm": 0.24185825884342194,
+      "learning_rate": 3.7904999416234864e-05,
+      "loss": 1.2412,
+      "step": 602
+    },
+    {
+      "epoch": 0.03832982400381391,
+      "grad_norm": 0.26332393288612366,
+      "learning_rate": 3.7608753611846446e-05,
+      "loss": 1.222,
+      "step": 603
+    },
+    {
+      "epoch": 0.03839338921775059,
+      "grad_norm": 0.258789986371994,
+      "learning_rate": 3.731340166169635e-05,
+      "loss": 1.105,
+      "step": 604
+    },
+    {
+      "epoch": 0.03845695443168726,
+      "grad_norm": 0.2542060315608978,
+      "learning_rate": 3.701894779717286e-05,
+      "loss": 1.149,
+      "step": 605
+    },
+    {
+      "epoch": 0.03852051964562393,
+      "grad_norm": 0.2608526647090912,
+      "learning_rate": 3.6725396236797935e-05,
+      "loss": 1.1996,
+      "step": 606
+    },
+    {
+      "epoch": 0.03858408485956061,
+      "grad_norm": 0.24727903306484222,
+      "learning_rate": 3.64327511861663e-05,
+      "loss": 1.1608,
+      "step": 607
+    },
+    {
+      "epoch": 0.038647650073497276,
+      "grad_norm": 0.2504411041736603,
+      "learning_rate": 3.614101683788575e-05,
+      "loss": 1.1245,
+      "step": 608
+    },
+    {
+      "epoch": 0.03871121528743395,
+      "grad_norm": 0.26074525713920593,
+      "learning_rate": 3.585019737151669e-05,
+      "loss": 1.1894,
+      "step": 609
+    },
+    {
+      "epoch": 0.03877478050137063,
+      "grad_norm": 0.2569214105606079,
+      "learning_rate": 3.5560296953512295e-05,
+      "loss": 1.2079,
+      "step": 610
+    },
+    {
+      "epoch": 0.038838345715307296,
+      "grad_norm": 0.2611664831638336,
+      "learning_rate": 3.52713197371591e-05,
+      "loss": 1.1224,
+      "step": 611
+    },
+    {
+      "epoch": 0.03890191092924397,
+      "grad_norm": 0.25584879517555237,
+      "learning_rate": 3.498326986251717e-05,
+      "loss": 1.3047,
+      "step": 612
+    },
+    {
+      "epoch": 0.03896547614318065,
+      "grad_norm": 0.24464906752109528,
+      "learning_rate": 3.4696151456360956e-05,
+      "loss": 1.0912,
+      "step": 613
+    },
+    {
+      "epoch": 0.039029041357117315,
+      "grad_norm": 0.25548434257507324,
+      "learning_rate": 3.4409968632120126e-05,
+      "loss": 1.2166,
+      "step": 614
+    },
+    {
+      "epoch": 0.03909260657105399,
+      "grad_norm": 0.25605612993240356,
+      "learning_rate": 3.4124725489820645e-05,
+      "loss": 1.2629,
+      "step": 615
+    },
+    {
+      "epoch": 0.039156171784990666,
+      "grad_norm": 0.24373793601989746,
+      "learning_rate": 3.3840426116026044e-05,
+      "loss": 1.1917,
+      "step": 616
+    },
+    {
+      "epoch": 0.039219736998927335,
+      "grad_norm": 0.24588941037654877,
+      "learning_rate": 3.3557074583778814e-05,
+      "loss": 1.286,
+      "step": 617
+    },
+    {
+      "epoch": 0.03928330221286401,
+      "grad_norm": 0.25556549429893494,
+      "learning_rate": 3.327467495254225e-05,
+      "loss": 1.2295,
+      "step": 618
+    },
+    {
+      "epoch": 0.039346867426800686,
+      "grad_norm": 0.2570589780807495,
+      "learning_rate": 3.299323126814191e-05,
+      "loss": 1.2417,
+      "step": 619
+    },
+    {
+      "epoch": 0.039410432640737354,
+      "grad_norm": 0.24832259118556976,
+      "learning_rate": 3.2712747562708115e-05,
+      "loss": 1.2996,
+      "step": 620
+    },
+    {
+      "epoch": 0.03947399785467403,
+      "grad_norm": 0.2418624311685562,
+      "learning_rate": 3.243322785461781e-05,
+      "loss": 1.2418,
+      "step": 621
+    },
+    {
+      "epoch": 0.039537563068610705,
+      "grad_norm": 0.2648262083530426,
+      "learning_rate": 3.215467614843719e-05,
+      "loss": 1.2913,
+      "step": 622
+    },
+    {
+      "epoch": 0.039601128282547374,
+      "grad_norm": 0.2682283818721771,
+      "learning_rate": 3.187709643486427e-05,
+      "loss": 1.2148,
+      "step": 623
+    },
+    {
+      "epoch": 0.03966469349648405,
+      "grad_norm": 0.26762086153030396,
+      "learning_rate": 3.160049269067174e-05,
+      "loss": 1.2949,
+      "step": 624
+    },
+    {
+      "epoch": 0.039728258710420725,
+      "grad_norm": 0.25577932596206665,
+      "learning_rate": 3.132486887864992e-05,
+      "loss": 1.12,
+      "step": 625
+    },
+    {
+      "epoch": 0.03979182392435739,
+      "grad_norm": 0.2690037190914154,
+      "learning_rate": 3.105022894755003e-05,
+      "loss": 1.1813,
+      "step": 626
+    },
+    {
+      "epoch": 0.03985538913829407,
+      "grad_norm": 0.25239890813827515,
+      "learning_rate": 3.077657683202779e-05,
+      "loss": 1.2678,
+      "step": 627
+    },
+    {
+      "epoch": 0.039918954352230744,
+      "grad_norm": 0.2665114104747772,
+      "learning_rate": 3.0503916452586612e-05,
+      "loss": 1.0682,
+      "step": 628
+    },
+    {
+      "epoch": 0.03998251956616741,
+      "grad_norm": 0.2506917715072632,
+      "learning_rate": 3.0232251715521932e-05,
+      "loss": 1.2247,
+      "step": 629
+    },
+    {
+      "epoch": 0.04004608478010409,
+      "grad_norm": 0.24417519569396973,
+      "learning_rate": 2.9961586512864947e-05,
+      "loss": 1.2097,
+      "step": 630
+    },
+    {
+      "epoch": 0.040109649994040764,
+      "grad_norm": 0.2546679973602295,
+      "learning_rate": 2.9691924722326826e-05,
+      "loss": 1.1986,
+      "step": 631
+    },
+    {
+      "epoch": 0.04017321520797743,
+      "grad_norm": 0.25688743591308594,
+      "learning_rate": 2.9423270207243437e-05,
+      "loss": 1.2543,
+      "step": 632
+    },
+    {
+      "epoch": 0.04023678042191411,
+      "grad_norm": 0.23989447951316833,
+      "learning_rate": 2.9155626816519677e-05,
+      "loss": 1.0773,
+      "step": 633
+    },
+    {
+      "epoch": 0.04030034563585078,
+      "grad_norm": 0.25679922103881836,
+      "learning_rate": 2.888899838457455e-05,
+      "loss": 1.2843,
+      "step": 634
+    },
+    {
+      "epoch": 0.04036391084978745,
+      "grad_norm": 0.2580190896987915,
+      "learning_rate": 2.8623388731286093e-05,
+      "loss": 1.1911,
+      "step": 635
+    },
+    {
+      "epoch": 0.04042747606372413,
+      "grad_norm": 0.24526208639144897,
+      "learning_rate": 2.835880166193683e-05,
+      "loss": 1.2574,
+      "step": 636
+    },
+    {
+      "epoch": 0.0404910412776608,
+      "grad_norm": 0.25860583782196045,
+      "learning_rate": 2.8095240967158954e-05,
+      "loss": 1.2943,
+      "step": 637
+    },
+    {
+      "epoch": 0.04055460649159747,
+      "grad_norm": 0.26202407479286194,
+      "learning_rate": 2.7832710422880328e-05,
+      "loss": 1.1769,
+      "step": 638
+    },
+    {
+      "epoch": 0.040618171705534147,
+      "grad_norm": 0.2458542138338089,
+      "learning_rate": 2.75712137902703e-05,
+      "loss": 1.1669,
+      "step": 639
+    },
+    {
+      "epoch": 0.04068173691947082,
+      "grad_norm": 0.25534749031066895,
+      "learning_rate": 2.7310754815685624e-05,
+      "loss": 1.2057,
+      "step": 640
+    },
+    {
+      "epoch": 0.04074530213340749,
+      "grad_norm": 0.2514583170413971,
+      "learning_rate": 2.7051337230617125e-05,
+      "loss": 1.2483,
+      "step": 641
+    },
+    {
+      "epoch": 0.040808867347344166,
+      "grad_norm": 0.25142601132392883,
+      "learning_rate": 2.679296475163595e-05,
+      "loss": 1.1685,
+      "step": 642
+    },
+    {
+      "epoch": 0.04087243256128084,
+      "grad_norm": 0.2746109962463379,
+      "learning_rate": 2.6535641080340458e-05,
+      "loss": 1.2658,
+      "step": 643
+    },
+    {
+      "epoch": 0.04093599777521751,
+      "grad_norm": 0.26082682609558105,
+      "learning_rate": 2.6279369903303175e-05,
+      "loss": 1.2184,
+      "step": 644
+    },
+    {
+      "epoch": 0.040999562989154185,
+      "grad_norm": 0.27172860503196716,
+      "learning_rate": 2.6024154892017937e-05,
+      "loss": 1.2417,
+      "step": 645
+    },
+    {
+      "epoch": 0.04106312820309086,
+      "grad_norm": 0.26511403918266296,
+      "learning_rate": 2.5769999702847346e-05,
+      "loss": 1.2099,
+      "step": 646
+    },
+    {
+      "epoch": 0.04112669341702753,
+      "grad_norm": 0.26414263248443604,
+      "learning_rate": 2.5516907976970328e-05,
+      "loss": 1.2562,
+      "step": 647
+    },
+    {
+      "epoch": 0.041190258630964205,
+      "grad_norm": 0.25787821412086487,
+      "learning_rate": 2.5264883340330113e-05,
+      "loss": 1.2202,
+      "step": 648
+    },
+    {
+      "epoch": 0.04125382384490088,
+      "grad_norm": 0.25424811244010925,
+      "learning_rate": 2.501392940358197e-05,
+      "loss": 1.2154,
+      "step": 649
+    },
+    {
+      "epoch": 0.04131738905883755,
+      "grad_norm": 0.26234978437423706,
+      "learning_rate": 2.4764049762041874e-05,
+      "loss": 1.242,
+      "step": 650
+    },
+    {
+      "epoch": 0.041380954272774224,
+      "grad_norm": 0.25528523325920105,
+      "learning_rate": 2.4515247995634694e-05,
+      "loss": 1.1873,
+      "step": 651
+    },
+    {
+      "epoch": 0.0414445194867109,
+      "grad_norm": 0.2629062235355377,
+      "learning_rate": 2.426752766884306e-05,
+      "loss": 1.1596,
+      "step": 652
+    },
+    {
+      "epoch": 0.04150808470064757,
+      "grad_norm": 0.24369929730892181,
+      "learning_rate": 2.4020892330656252e-05,
+      "loss": 1.069,
+      "step": 653
+    },
+    {
+      "epoch": 0.041571649914584244,
+      "grad_norm": 0.2602699100971222,
+      "learning_rate": 2.377534551451932e-05,
+      "loss": 1.2132,
+      "step": 654
+    },
+    {
+      "epoch": 0.04163521512852092,
+      "grad_norm": 0.24992002546787262,
+      "learning_rate": 2.353089073828255e-05,
+      "loss": 1.1259,
+      "step": 655
+    },
+    {
+      "epoch": 0.04169878034245759,
+      "grad_norm": 0.2784167528152466,
+      "learning_rate": 2.328753150415094e-05,
+      "loss": 1.1997,
+      "step": 656
+    },
+    {
+      "epoch": 0.04176234555639426,
+      "grad_norm": 0.2581193745136261,
+      "learning_rate": 2.304527129863424e-05,
+      "loss": 1.1832,
+      "step": 657
+    },
+    {
+      "epoch": 0.04182591077033094,
+      "grad_norm": 0.25155678391456604,
+      "learning_rate": 2.280411359249668e-05,
+      "loss": 1.147,
+      "step": 658
+    },
+    {
+      "epoch": 0.04188947598426761,
+      "grad_norm": 0.2618091106414795,
+      "learning_rate": 2.2564061840707495e-05,
+      "loss": 1.303,
+      "step": 659
+    },
+    {
+      "epoch": 0.04195304119820428,
+      "grad_norm": 0.2630173861980438,
+      "learning_rate": 2.2325119482391467e-05,
+      "loss": 1.2555,
+      "step": 660
+    },
+    {
+      "epoch": 0.04201660641214096,
+      "grad_norm": 0.25127795338630676,
+      "learning_rate": 2.2087289940779343e-05,
+      "loss": 1.1694,
+      "step": 661
+    },
+    {
+      "epoch": 0.04208017162607763,
+      "grad_norm": 0.2526141107082367,
+      "learning_rate": 2.185057662315918e-05,
+      "loss": 1.0997,
+      "step": 662
+    },
+    {
+      "epoch": 0.0421437368400143,
+      "grad_norm": 0.2466498613357544,
+      "learning_rate": 2.1614982920827243e-05,
+      "loss": 1.2093,
+      "step": 663
+    },
+    {
+      "epoch": 0.04220730205395098,
+      "grad_norm": 0.2559715211391449,
+      "learning_rate": 2.1380512209039528e-05,
+      "loss": 1.239,
+      "step": 664
+    },
+    {
+      "epoch": 0.042270867267887646,
+      "grad_norm": 0.24562884867191315,
+      "learning_rate": 2.1147167846963422e-05,
+      "loss": 1.1716,
+      "step": 665
+    },
+    {
+      "epoch": 0.04233443248182432,
+      "grad_norm": 0.25966036319732666,
+      "learning_rate": 2.0914953177629548e-05,
+      "loss": 1.2553,
+      "step": 666
+    },
+    {
+      "epoch": 0.042397997695761,
+      "grad_norm": 0.25772759318351746,
+      "learning_rate": 2.068387152788387e-05,
+      "loss": 1.1341,
+      "step": 667
+    },
+    {
+      "epoch": 0.042461562909697666,
+      "grad_norm": 0.24900874495506287,
+      "learning_rate": 2.0453926208340003e-05,
+      "loss": 1.1742,
+      "step": 668
+    },
+    {
+      "epoch": 0.04252512812363434,
+      "grad_norm": 0.2540144622325897,
+      "learning_rate": 2.022512051333194e-05,
+      "loss": 1.1856,
+      "step": 669
+    },
+    {
+      "epoch": 0.04258869333757102,
+      "grad_norm": 0.26840710639953613,
+      "learning_rate": 1.999745772086655e-05,
+      "loss": 1.2104,
+      "step": 670
+    },
+    {
+      "epoch": 0.042652258551507685,
+      "grad_norm": 0.2511826753616333,
+      "learning_rate": 1.9770941092576957e-05,
+      "loss": 1.2477,
+      "step": 671
+    },
+    {
+      "epoch": 0.04271582376544436,
+      "grad_norm": 0.26480165123939514,
+      "learning_rate": 1.954557387367557e-05,
+      "loss": 1.1991,
+      "step": 672
+    },
+    {
+      "epoch": 0.042779388979381036,
+      "grad_norm": 0.2562330663204193,
+      "learning_rate": 1.9321359292907702e-05,
+      "loss": 1.2336,
+      "step": 673
+    },
+    {
+      "epoch": 0.042842954193317705,
+      "grad_norm": 0.25312507152557373,
+      "learning_rate": 1.9098300562505266e-05,
+      "loss": 1.1593,
+      "step": 674
+    },
+    {
+      "epoch": 0.04290651940725438,
+      "grad_norm": 0.2678249180316925,
+      "learning_rate": 1.8876400878140775e-05,
+      "loss": 1.1886,
+      "step": 675
+    },
+    {
+      "epoch": 0.042970084621191056,
+      "grad_norm": 0.25428783893585205,
+      "learning_rate": 1.8655663418881584e-05,
+      "loss": 1.2123,
+      "step": 676
+    },
+    {
+      "epoch": 0.043033649835127724,
+      "grad_norm": 0.2611987292766571,
+      "learning_rate": 1.8436091347144246e-05,
+      "loss": 1.2407,
+      "step": 677
+    },
+    {
+      "epoch": 0.0430972150490644,
+      "grad_norm": 0.2611881196498871,
+      "learning_rate": 1.821768780864943e-05,
+      "loss": 1.1918,
+      "step": 678
+    },
+    {
+      "epoch": 0.043160780263001075,
+      "grad_norm": 0.2661250829696655,
+      "learning_rate": 1.800045593237647e-05,
+      "loss": 1.2046,
+      "step": 679
+    },
+    {
+      "epoch": 0.043224345476937744,
+      "grad_norm": 0.2643533945083618,
+      "learning_rate": 1.7784398830519e-05,
+      "loss": 1.1827,
+      "step": 680
+    },
+    {
+      "epoch": 0.04328791069087442,
+      "grad_norm": 0.25061362981796265,
+      "learning_rate": 1.756951959844e-05,
+      "loss": 1.2051,
+      "step": 681
+    },
+    {
+      "epoch": 0.043351475904811095,
+      "grad_norm": 0.24832050502300262,
+      "learning_rate": 1.7355821314627564e-05,
+      "loss": 1.1704,
+      "step": 682
+    },
+    {
+      "epoch": 0.04341504111874776,
+      "grad_norm": 0.26712068915367126,
+      "learning_rate": 1.7143307040650925e-05,
+      "loss": 1.2655,
+      "step": 683
+    },
+    {
+      "epoch": 0.04347860633268444,
+      "grad_norm": 0.26257115602493286,
+      "learning_rate": 1.6931979821116418e-05,
+      "loss": 1.183,
+      "step": 684
+    },
+    {
+      "epoch": 0.043542171546621114,
+      "grad_norm": 0.2578732371330261,
+      "learning_rate": 1.672184268362391e-05,
+      "loss": 1.1036,
+      "step": 685
+    },
+    {
+      "epoch": 0.04360573676055778,
+      "grad_norm": 0.25747859477996826,
+      "learning_rate": 1.6512898638723497e-05,
+      "loss": 1.2769,
+      "step": 686
+    },
+    {
+      "epoch": 0.04366930197449446,
+      "grad_norm": 0.26593005657196045,
+      "learning_rate": 1.630515067987226e-05,
+      "loss": 1.2707,
+      "step": 687
+    },
+    {
+      "epoch": 0.04373286718843113,
+      "grad_norm": 0.2610760033130646,
+      "learning_rate": 1.6098601783391487e-05,
+      "loss": 1.2226,
+      "step": 688
+    },
+    {
+      "epoch": 0.0437964324023678,
+      "grad_norm": 0.2636644244194031,
+      "learning_rate": 1.5893254908423937e-05,
+      "loss": 1.194,
+      "step": 689
+    },
+    {
+      "epoch": 0.04385999761630448,
+      "grad_norm": 0.25099021196365356,
+      "learning_rate": 1.5689112996891576e-05,
+      "loss": 1.1853,
+      "step": 690
+    },
+    {
+      "epoch": 0.04392356283024115,
+      "grad_norm": 0.26002123951911926,
+      "learning_rate": 1.54861789734532e-05,
+      "loss": 1.1705,
+      "step": 691
+    },
+    {
+      "epoch": 0.04398712804417782,
+      "grad_norm": 0.25610899925231934,
+      "learning_rate": 1.5284455745462834e-05,
+      "loss": 1.173,
+      "step": 692
+    },
+    {
+      "epoch": 0.0440506932581145,
+      "grad_norm": 0.2630417048931122,
+      "learning_rate": 1.5083946202927824e-05,
+      "loss": 1.183,
+      "step": 693
+    },
+    {
+      "epoch": 0.04411425847205117,
+      "grad_norm": 0.26131799817085266,
+      "learning_rate": 1.4884653218467571e-05,
+      "loss": 1.2147,
+      "step": 694
+    },
+    {
+      "epoch": 0.04417782368598784,
+      "grad_norm": 0.2511073052883148,
+      "learning_rate": 1.4686579647272336e-05,
+      "loss": 1.1362,
+      "step": 695
+    },
+    {
+      "epoch": 0.044241388899924516,
+      "grad_norm": 0.2500525414943695,
+      "learning_rate": 1.4489728327062324e-05,
+      "loss": 1.1264,
+      "step": 696
+    },
+    {
+      "epoch": 0.04430495411386119,
+      "grad_norm": 0.2648208439350128,
+      "learning_rate": 1.4294102078047055e-05,
+      "loss": 1.2098,
+      "step": 697
+    },
+    {
+      "epoch": 0.04436851932779786,
+      "grad_norm": 0.2602032721042633,
+      "learning_rate": 1.4099703702884936e-05,
+      "loss": 1.2527,
+      "step": 698
+    },
+    {
+      "epoch": 0.044432084541734536,
+      "grad_norm": 0.26263752579689026,
+      "learning_rate": 1.3906535986643176e-05,
+      "loss": 1.218,
+      "step": 699
+    },
+    {
+      "epoch": 0.04449564975567121,
+      "grad_norm": 0.2635667622089386,
+      "learning_rate": 1.3714601696757712e-05,
+      "loss": 1.2896,
+      "step": 700
+    },
+    {
+      "epoch": 0.04449564975567121,
+      "eval_loss": 1.2046868801116943,
+      "eval_runtime": 1238.8537,
+      "eval_samples_per_second": 4.036,
+      "eval_steps_per_second": 1.009,
+      "step": 700
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.637357672660992e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null