Training in progress, step 360, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +851 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c4b4e0c119e19824e7bd5407b4bbc3376338122ebee9b6aeec68c49592370df
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f87937b8ca27ccd4299b7c5130ae44956cd82f7f19b3390773e63ba13d89567
 size 83945296

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ddee69f74c4e0b1f4c805c6781d6e486fedf745854f942f9e17ad08f3a76da24
-size 43122580

 version https://git-lfs.github.com/spec/v1
+oid sha256:f877183493e32a2691b5c7511ce4bd58a6fd3cee41d4a62b9ca7258094d06da7
+size 43123028

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9541b46c7b71871f1a3b9e3df6f3775e4939fad20c1d964c0daf912320c7f532
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee84c698affd10554f0eb51f34115cac8713e7377d5275afd8629075df0dbc22
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8cc881deac2823d81b5585b733cf7bc610286ec0ee2764f20dd7976dbe33563b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e42796949e3f84fa5a7101ae5e25b3cdd5ef9daeacff6fbfb32969265fb052fd
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5031446540880503,
   "eval_steps": 120,
-  "global_step": 240,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1703,6 +1703,854 @@
       "eval_samples_per_second": 14.403,
       "eval_steps_per_second": 7.237,
       "step": 240
     }
   ],
   "logging_steps": 1,
@@ -1722,7 +2570,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0089372973635994e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7547169811320755,
   "eval_steps": 120,
+  "global_step": 360,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 14.403,
       "eval_steps_per_second": 7.237,
       "step": 240
+    },
+    {
+      "epoch": 0.5052410901467506,
+      "grad_norm": 0.4681568741798401,
+      "learning_rate": 0.00010168171550280648,
+      "loss": 1.4336,
+      "step": 241
+    },
+    {
+      "epoch": 0.5073375262054507,
+      "grad_norm": 0.4985293447971344,
+      "learning_rate": 0.00010100905974490651,
+      "loss": 1.2779,
+      "step": 242
+    },
+    {
+      "epoch": 0.5094339622641509,
+      "grad_norm": 0.5365903377532959,
+      "learning_rate": 0.0001003363583222415,
+      "loss": 1.5644,
+      "step": 243
+    },
+    {
+      "epoch": 0.5115303983228512,
+      "grad_norm": 0.48949047923088074,
+      "learning_rate": 9.96636416777585e-05,
+      "loss": 1.287,
+      "step": 244
+    },
+    {
+      "epoch": 0.5136268343815513,
+      "grad_norm": 0.5527725219726562,
+      "learning_rate": 9.899094025509352e-05,
+      "loss": 1.4812,
+      "step": 245
+    },
+    {
+      "epoch": 0.5157232704402516,
+      "grad_norm": 0.5897342562675476,
+      "learning_rate": 9.831828449719353e-05,
+      "loss": 1.4931,
+      "step": 246
+    },
+    {
+      "epoch": 0.5178197064989518,
+      "grad_norm": 0.5165984630584717,
+      "learning_rate": 9.764570484493915e-05,
+      "loss": 1.208,
+      "step": 247
+    },
+    {
+      "epoch": 0.519916142557652,
+      "grad_norm": 0.654744565486908,
+      "learning_rate": 9.697323173576667e-05,
+      "loss": 1.6835,
+      "step": 248
+    },
+    {
+      "epoch": 0.5220125786163522,
+      "grad_norm": 0.6669768691062927,
+      "learning_rate": 9.630089560229088e-05,
+      "loss": 1.1676,
+      "step": 249
+    },
+    {
+      "epoch": 0.5241090146750524,
+      "grad_norm": 1.1078684329986572,
+      "learning_rate": 9.562872687092783e-05,
+      "loss": 1.4822,
+      "step": 250
+    },
+    {
+      "epoch": 0.5262054507337526,
+      "grad_norm": 0.31687837839126587,
+      "learning_rate": 9.495675596051777e-05,
+      "loss": 1.1412,
+      "step": 251
+    },
+    {
+      "epoch": 0.5283018867924528,
+      "grad_norm": 0.32858502864837646,
+      "learning_rate": 9.428501328094855e-05,
+      "loss": 1.1039,
+      "step": 252
+    },
+    {
+      "epoch": 0.5303983228511531,
+      "grad_norm": 0.32667338848114014,
+      "learning_rate": 9.36135292317796e-05,
+      "loss": 1.2138,
+      "step": 253
+    },
+    {
+      "epoch": 0.5324947589098532,
+      "grad_norm": 0.3239830732345581,
+      "learning_rate": 9.294233420086603e-05,
+      "loss": 1.3107,
+      "step": 254
+    },
+    {
+      "epoch": 0.5345911949685535,
+      "grad_norm": 0.37542301416397095,
+      "learning_rate": 9.227145856298344e-05,
+      "loss": 1.1616,
+      "step": 255
+    },
+    {
+      "epoch": 0.5366876310272537,
+      "grad_norm": 0.356937050819397,
+      "learning_rate": 9.160093267845349e-05,
+      "loss": 1.3193,
+      "step": 256
+    },
+    {
+      "epoch": 0.5387840670859538,
+      "grad_norm": 0.31748053431510925,
+      "learning_rate": 9.093078689176972e-05,
+      "loss": 1.2206,
+      "step": 257
+    },
+    {
+      "epoch": 0.5408805031446541,
+      "grad_norm": 0.3468354642391205,
+      "learning_rate": 9.026105153022454e-05,
+      "loss": 1.3771,
+      "step": 258
+    },
+    {
+      "epoch": 0.5429769392033543,
+      "grad_norm": 0.35934552550315857,
+      "learning_rate": 8.95917569025366e-05,
+      "loss": 1.2956,
+      "step": 259
+    },
+    {
+      "epoch": 0.5450733752620545,
+      "grad_norm": 0.35253018140792847,
+      "learning_rate": 8.892293329747922e-05,
+      "loss": 1.2584,
+      "step": 260
+    },
+    {
+      "epoch": 0.5471698113207547,
+      "grad_norm": 0.3898825943470001,
+      "learning_rate": 8.82546109825098e-05,
+      "loss": 1.2658,
+      "step": 261
+    },
+    {
+      "epoch": 0.549266247379455,
+      "grad_norm": 0.44399771094322205,
+      "learning_rate": 8.758682020239984e-05,
+      "loss": 1.314,
+      "step": 262
+    },
+    {
+      "epoch": 0.5513626834381551,
+      "grad_norm": 0.3997746706008911,
+      "learning_rate": 8.69195911778664e-05,
+      "loss": 1.1614,
+      "step": 263
+    },
+    {
+      "epoch": 0.5534591194968553,
+      "grad_norm": 0.38755127787590027,
+      "learning_rate": 8.625295410420451e-05,
+      "loss": 1.209,
+      "step": 264
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 0.3919242024421692,
+      "learning_rate": 8.558693914992046e-05,
+      "loss": 1.4929,
+      "step": 265
+    },
+    {
+      "epoch": 0.5576519916142557,
+      "grad_norm": 0.37072527408599854,
+      "learning_rate": 8.492157645536678e-05,
+      "loss": 1.0455,
+      "step": 266
+    },
+    {
+      "epoch": 0.559748427672956,
+      "grad_norm": 0.39799362421035767,
+      "learning_rate": 8.425689613137813e-05,
+      "loss": 1.3028,
+      "step": 267
+    },
+    {
+      "epoch": 0.5618448637316562,
+      "grad_norm": 0.34042948484420776,
+      "learning_rate": 8.359292825790859e-05,
+      "loss": 1.2346,
+      "step": 268
+    },
+    {
+      "epoch": 0.5639412997903563,
+      "grad_norm": 0.41574835777282715,
+      "learning_rate": 8.292970288267042e-05,
+      "loss": 1.6697,
+      "step": 269
+    },
+    {
+      "epoch": 0.5660377358490566,
+      "grad_norm": 0.39489176869392395,
+      "learning_rate": 8.226725001977445e-05,
+      "loss": 1.4031,
+      "step": 270
+    },
+    {
+      "epoch": 0.5681341719077568,
+      "grad_norm": 0.37004798650741577,
+      "learning_rate": 8.160559964837149e-05,
+      "loss": 1.0657,
+      "step": 271
+    },
+    {
+      "epoch": 0.570230607966457,
+      "grad_norm": 0.4158036410808563,
+      "learning_rate": 8.094478171129588e-05,
+      "loss": 1.4288,
+      "step": 272
+    },
+    {
+      "epoch": 0.5723270440251572,
+      "grad_norm": 0.39994949102401733,
+      "learning_rate": 8.028482611371028e-05,
+      "loss": 1.4105,
+      "step": 273
+    },
+    {
+      "epoch": 0.5744234800838575,
+      "grad_norm": 0.3869493007659912,
+      "learning_rate": 7.96257627217524e-05,
+      "loss": 1.452,
+      "step": 274
+    },
+    {
+      "epoch": 0.5765199161425576,
+      "grad_norm": 0.41220688819885254,
+      "learning_rate": 7.896762136118342e-05,
+      "loss": 1.6473,
+      "step": 275
+    },
+    {
+      "epoch": 0.5786163522012578,
+      "grad_norm": 0.4000371992588043,
+      "learning_rate": 7.831043181603814e-05,
+      "loss": 1.2974,
+      "step": 276
+    },
+    {
+      "epoch": 0.5807127882599581,
+      "grad_norm": 0.43282032012939453,
+      "learning_rate": 7.765422382727719e-05,
+      "loss": 1.4971,
+      "step": 277
+    },
+    {
+      "epoch": 0.5828092243186582,
+      "grad_norm": 0.37778139114379883,
+      "learning_rate": 7.699902709144114e-05,
+      "loss": 1.2826,
+      "step": 278
+    },
+    {
+      "epoch": 0.5849056603773585,
+      "grad_norm": 0.3722570538520813,
+      "learning_rate": 7.634487125930648e-05,
+      "loss": 1.0699,
+      "step": 279
+    },
+    {
+      "epoch": 0.5870020964360587,
+      "grad_norm": 0.4069075882434845,
+      "learning_rate": 7.569178593454392e-05,
+      "loss": 1.2477,
+      "step": 280
+    },
+    {
+      "epoch": 0.589098532494759,
+      "grad_norm": 0.4309166669845581,
+      "learning_rate": 7.503980067237852e-05,
+      "loss": 1.2098,
+      "step": 281
+    },
+    {
+      "epoch": 0.5911949685534591,
+      "grad_norm": 0.3983183205127716,
+      "learning_rate": 7.438894497825235e-05,
+      "loss": 1.2926,
+      "step": 282
+    },
+    {
+      "epoch": 0.5932914046121593,
+      "grad_norm": 0.4100129008293152,
+      "learning_rate": 7.373924830648904e-05,
+      "loss": 1.2444,
+      "step": 283
+    },
+    {
+      "epoch": 0.5953878406708596,
+      "grad_norm": 0.45969000458717346,
+      "learning_rate": 7.309074005896103e-05,
+      "loss": 1.5165,
+      "step": 284
+    },
+    {
+      "epoch": 0.5974842767295597,
+      "grad_norm": 0.42328140139579773,
+      "learning_rate": 7.244344958375881e-05,
+      "loss": 1.2259,
+      "step": 285
+    },
+    {
+      "epoch": 0.59958071278826,
+      "grad_norm": 0.4778403639793396,
+      "learning_rate": 7.179740617386295e-05,
+      "loss": 1.3914,
+      "step": 286
+    },
+    {
+      "epoch": 0.6016771488469602,
+      "grad_norm": 0.5297214388847351,
+      "learning_rate": 7.115263906581829e-05,
+      "loss": 1.6115,
+      "step": 287
+    },
+    {
+      "epoch": 0.6037735849056604,
+      "grad_norm": 0.5362441539764404,
+      "learning_rate": 7.0509177438411e-05,
+      "loss": 1.3647,
+      "step": 288
+    },
+    {
+      "epoch": 0.6058700209643606,
+      "grad_norm": 0.4826620817184448,
+      "learning_rate": 6.986705041134796e-05,
+      "loss": 1.5252,
+      "step": 289
+    },
+    {
+      "epoch": 0.6079664570230608,
+      "grad_norm": 0.4332706928253174,
+      "learning_rate": 6.922628704393904e-05,
+      "loss": 1.2502,
+      "step": 290
+    },
+    {
+      "epoch": 0.610062893081761,
+      "grad_norm": 0.47122621536254883,
+      "learning_rate": 6.858691633378202e-05,
+      "loss": 1.2901,
+      "step": 291
+    },
+    {
+      "epoch": 0.6121593291404612,
+      "grad_norm": 0.5842708945274353,
+      "learning_rate": 6.794896721545032e-05,
+      "loss": 1.5187,
+      "step": 292
+    },
+    {
+      "epoch": 0.6142557651991615,
+      "grad_norm": 0.4648444652557373,
+      "learning_rate": 6.73124685591835e-05,
+      "loss": 1.2955,
+      "step": 293
+    },
+    {
+      "epoch": 0.6163522012578616,
+      "grad_norm": 0.5161048173904419,
+      "learning_rate": 6.667744916958085e-05,
+      "loss": 1.4571,
+      "step": 294
+    },
+    {
+      "epoch": 0.6184486373165619,
+      "grad_norm": 0.4833495020866394,
+      "learning_rate": 6.604393778429772e-05,
+      "loss": 1.0478,
+      "step": 295
+    },
+    {
+      "epoch": 0.6205450733752621,
+      "grad_norm": 0.5789321660995483,
+      "learning_rate": 6.541196307274517e-05,
+      "loss": 1.3774,
+      "step": 296
+    },
+    {
+      "epoch": 0.6226415094339622,
+      "grad_norm": 0.6312127709388733,
+      "learning_rate": 6.478155363479236e-05,
+      "loss": 1.6907,
+      "step": 297
+    },
+    {
+      "epoch": 0.6247379454926625,
+      "grad_norm": 0.5820334553718567,
+      "learning_rate": 6.415273799947234e-05,
+      "loss": 1.1408,
+      "step": 298
+    },
+    {
+      "epoch": 0.6268343815513627,
+      "grad_norm": 0.6500141024589539,
+      "learning_rate": 6.352554462369112e-05,
+      "loss": 1.3631,
+      "step": 299
+    },
+    {
+      "epoch": 0.6289308176100629,
+      "grad_norm": 0.9458006024360657,
+      "learning_rate": 6.290000189093959e-05,
+      "loss": 1.3022,
+      "step": 300
+    },
+    {
+      "epoch": 0.6310272536687631,
+      "grad_norm": 0.32660362124443054,
+      "learning_rate": 6.227613811000925e-05,
+      "loss": 1.015,
+      "step": 301
+    },
+    {
+      "epoch": 0.6331236897274634,
+      "grad_norm": 0.2937367558479309,
+      "learning_rate": 6.165398151371106e-05,
+      "loss": 0.9367,
+      "step": 302
+    },
+    {
+      "epoch": 0.6352201257861635,
+      "grad_norm": 0.37501296401023865,
+      "learning_rate": 6.103356025759759e-05,
+      "loss": 1.2524,
+      "step": 303
+    },
+    {
+      "epoch": 0.6373165618448637,
+      "grad_norm": 0.29749903082847595,
+      "learning_rate": 6.04149024186891e-05,
+      "loss": 1.068,
+      "step": 304
+    },
+    {
+      "epoch": 0.639412997903564,
+      "grad_norm": 0.2890242338180542,
+      "learning_rate": 5.9798035994202836e-05,
+      "loss": 1.1022,
+      "step": 305
+    },
+    {
+      "epoch": 0.6415094339622641,
+      "grad_norm": 0.33415907621383667,
+      "learning_rate": 5.918298890028591e-05,
+      "loss": 1.4532,
+      "step": 306
+    },
+    {
+      "epoch": 0.6436058700209644,
+      "grad_norm": 0.2996525168418884,
+      "learning_rate": 5.8569788970752114e-05,
+      "loss": 1.0128,
+      "step": 307
+    },
+    {
+      "epoch": 0.6457023060796646,
+      "grad_norm": 0.343421071767807,
+      "learning_rate": 5.795846395582225e-05,
+      "loss": 1.0972,
+      "step": 308
+    },
+    {
+      "epoch": 0.6477987421383647,
+      "grad_norm": 0.32696643471717834,
+      "learning_rate": 5.734904152086828e-05,
+      "loss": 1.2231,
+      "step": 309
+    },
+    {
+      "epoch": 0.649895178197065,
+      "grad_norm": 0.34797459840774536,
+      "learning_rate": 5.6741549245161285e-05,
+      "loss": 1.2454,
+      "step": 310
+    },
+    {
+      "epoch": 0.6519916142557652,
+      "grad_norm": 0.3834897577762604,
+      "learning_rate": 5.6136014620623525e-05,
+      "loss": 1.288,
+      "step": 311
+    },
+    {
+      "epoch": 0.6540880503144654,
+      "grad_norm": 0.3343852162361145,
+      "learning_rate": 5.5532465050584206e-05,
+      "loss": 1.1281,
+      "step": 312
+    },
+    {
+      "epoch": 0.6561844863731656,
+      "grad_norm": 0.33837154507637024,
+      "learning_rate": 5.4930927848539256e-05,
+      "loss": 1.1146,
+      "step": 313
+    },
+    {
+      "epoch": 0.6582809224318659,
+      "grad_norm": 0.395158052444458,
+      "learning_rate": 5.433143023691547e-05,
+      "loss": 1.2687,
+      "step": 314
+    },
+    {
+      "epoch": 0.660377358490566,
+      "grad_norm": 0.3619198203086853,
+      "learning_rate": 5.373399934583839e-05,
+      "loss": 1.3387,
+      "step": 315
+    },
+    {
+      "epoch": 0.6624737945492662,
+      "grad_norm": 0.3589290380477905,
+      "learning_rate": 5.3138662211904654e-05,
+      "loss": 1.1821,
+      "step": 316
+    },
+    {
+      "epoch": 0.6645702306079665,
+      "grad_norm": 0.46267178654670715,
+      "learning_rate": 5.25454457769583e-05,
+      "loss": 1.3747,
+      "step": 317
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.35914307832717896,
+      "learning_rate": 5.1954376886871746e-05,
+      "loss": 1.0113,
+      "step": 318
+    },
+    {
+      "epoch": 0.6687631027253669,
+      "grad_norm": 0.3956226408481598,
+      "learning_rate": 5.1365482290330645e-05,
+      "loss": 1.2348,
+      "step": 319
+    },
+    {
+      "epoch": 0.6708595387840671,
+      "grad_norm": 0.3596416115760803,
+      "learning_rate": 5.07787886376236e-05,
+      "loss": 1.0949,
+      "step": 320
+    },
+    {
+      "epoch": 0.6729559748427673,
+      "grad_norm": 0.4158559739589691,
+      "learning_rate": 5.019432247943595e-05,
+      "loss": 1.236,
+      "step": 321
+    },
+    {
+      "epoch": 0.6750524109014675,
+      "grad_norm": 0.4549407958984375,
+      "learning_rate": 4.961211026564837e-05,
+      "loss": 1.3363,
+      "step": 322
+    },
+    {
+      "epoch": 0.6771488469601677,
+      "grad_norm": 0.3847133219242096,
+      "learning_rate": 4.90321783441397e-05,
+      "loss": 0.9304,
+      "step": 323
+    },
+    {
+      "epoch": 0.6792452830188679,
+      "grad_norm": 0.381661057472229,
+      "learning_rate": 4.845455295959468e-05,
+      "loss": 0.9649,
+      "step": 324
+    },
+    {
+      "epoch": 0.6813417190775681,
+      "grad_norm": 0.3821321129798889,
+      "learning_rate": 4.787926025231634e-05,
+      "loss": 1.128,
+      "step": 325
+    },
+    {
+      "epoch": 0.6834381551362684,
+      "grad_norm": 0.42627325654029846,
+      "learning_rate": 4.730632625704288e-05,
+      "loss": 1.2158,
+      "step": 326
+    },
+    {
+      "epoch": 0.6855345911949685,
+      "grad_norm": 0.5116820931434631,
+      "learning_rate": 4.673577690176956e-05,
+      "loss": 1.228,
+      "step": 327
+    },
+    {
+      "epoch": 0.6876310272536688,
+      "grad_norm": 0.37617841362953186,
+      "learning_rate": 4.616763800657534e-05,
+      "loss": 1.0418,
+      "step": 328
+    },
+    {
+      "epoch": 0.689727463312369,
+      "grad_norm": 0.40348488092422485,
+      "learning_rate": 4.560193528245425e-05,
+      "loss": 1.303,
+      "step": 329
+    },
+    {
+      "epoch": 0.6918238993710691,
+      "grad_norm": 0.39703086018562317,
+      "learning_rate": 4.5038694330152135e-05,
+      "loss": 1.0071,
+      "step": 330
+    },
+    {
+      "epoch": 0.6939203354297694,
+      "grad_norm": 0.4976246654987335,
+      "learning_rate": 4.447794063900772e-05,
+      "loss": 1.4131,
+      "step": 331
+    },
+    {
+      "epoch": 0.6960167714884696,
+      "grad_norm": 0.4676234722137451,
+      "learning_rate": 4.391969958579948e-05,
+      "loss": 1.4407,
+      "step": 332
+    },
+    {
+      "epoch": 0.6981132075471698,
+      "grad_norm": 0.47766080498695374,
+      "learning_rate": 4.3363996433596954e-05,
+      "loss": 1.196,
+      "step": 333
+    },
+    {
+      "epoch": 0.70020964360587,
+      "grad_norm": 0.45446816086769104,
+      "learning_rate": 4.281085633061764e-05,
+      "loss": 1.4197,
+      "step": 334
+    },
+    {
+      "epoch": 0.7023060796645703,
+      "grad_norm": 0.4232485294342041,
+      "learning_rate": 4.2260304309088696e-05,
+      "loss": 1.1722,
+      "step": 335
+    },
+    {
+      "epoch": 0.7044025157232704,
+      "grad_norm": 0.4267813563346863,
+      "learning_rate": 4.171236528411436e-05,
+      "loss": 1.1742,
+      "step": 336
+    },
+    {
+      "epoch": 0.7064989517819706,
+      "grad_norm": 0.5247170329093933,
+      "learning_rate": 4.116706405254834e-05,
+      "loss": 1.4083,
+      "step": 337
+    },
+    {
+      "epoch": 0.7085953878406709,
+      "grad_norm": 0.4244326949119568,
+      "learning_rate": 4.0624425291871506e-05,
+      "loss": 0.9588,
+      "step": 338
+    },
+    {
+      "epoch": 0.710691823899371,
+      "grad_norm": 0.47463715076446533,
+      "learning_rate": 4.0084473559075333e-05,
+      "loss": 1.2465,
+      "step": 339
+    },
+    {
+      "epoch": 0.7127882599580713,
+      "grad_norm": 0.5082678198814392,
+      "learning_rate": 3.9547233289550356e-05,
+      "loss": 1.2823,
+      "step": 340
+    },
+    {
+      "epoch": 0.7148846960167715,
+      "grad_norm": 0.4892716407775879,
+      "learning_rate": 3.901272879598058e-05,
+      "loss": 1.3535,
+      "step": 341
+    },
+    {
+      "epoch": 0.7169811320754716,
+      "grad_norm": 0.4783913791179657,
+      "learning_rate": 3.848098426724306e-05,
+      "loss": 1.2831,
+      "step": 342
+    },
+    {
+      "epoch": 0.7190775681341719,
+      "grad_norm": 0.551485002040863,
+      "learning_rate": 3.7952023767313264e-05,
+      "loss": 1.4206,
+      "step": 343
+    },
+    {
+      "epoch": 0.7211740041928721,
+      "grad_norm": 0.5458889007568359,
+      "learning_rate": 3.7425871234176134e-05,
+      "loss": 1.192,
+      "step": 344
+    },
+    {
+      "epoch": 0.7232704402515723,
+      "grad_norm": 0.5561458468437195,
+      "learning_rate": 3.690255047874267e-05,
+      "loss": 1.2386,
+      "step": 345
+    },
+    {
+      "epoch": 0.7253668763102725,
+      "grad_norm": 0.5652120113372803,
+      "learning_rate": 3.6382085183772394e-05,
+      "loss": 1.1185,
+      "step": 346
+    },
+    {
+      "epoch": 0.7274633123689728,
+      "grad_norm": 0.6339935064315796,
+      "learning_rate": 3.586449890280172e-05,
+      "loss": 1.406,
+      "step": 347
+    },
+    {
+      "epoch": 0.7295597484276729,
+      "grad_norm": 0.6352638006210327,
+      "learning_rate": 3.534981505907792e-05,
+      "loss": 1.4451,
+      "step": 348
+    },
+    {
+      "epoch": 0.7316561844863732,
+      "grad_norm": 0.7120410203933716,
+      "learning_rate": 3.483805694449913e-05,
+      "loss": 1.0784,
+      "step": 349
+    },
+    {
+      "epoch": 0.7337526205450734,
+      "grad_norm": 0.9480962753295898,
+      "learning_rate": 3.432924771856029e-05,
+      "loss": 1.1712,
+      "step": 350
+    },
+    {
+      "epoch": 0.7358490566037735,
+      "grad_norm": 0.35054638981819153,
+      "learning_rate": 3.3823410407305015e-05,
+      "loss": 1.3018,
+      "step": 351
+    },
+    {
+      "epoch": 0.7379454926624738,
+      "grad_norm": 0.30331677198410034,
+      "learning_rate": 3.33205679022837e-05,
+      "loss": 1.2488,
+      "step": 352
+    },
+    {
+      "epoch": 0.740041928721174,
+      "grad_norm": 0.30316171050071716,
+      "learning_rate": 3.2820742959517436e-05,
+      "loss": 0.9948,
+      "step": 353
+    },
+    {
+      "epoch": 0.7421383647798742,
+      "grad_norm": 0.3911697268486023,
+      "learning_rate": 3.232395819846824e-05,
+      "loss": 1.1168,
+      "step": 354
+    },
+    {
+      "epoch": 0.7442348008385744,
+      "grad_norm": 0.3382274806499481,
+      "learning_rate": 3.1830236101015446e-05,
+      "loss": 1.0427,
+      "step": 355
+    },
+    {
+      "epoch": 0.7463312368972747,
+      "grad_norm": 0.32363229990005493,
+      "learning_rate": 3.1339599010438134e-05,
+      "loss": 0.94,
+      "step": 356
+    },
+    {
+      "epoch": 0.7484276729559748,
+      "grad_norm": 0.3242010772228241,
+      "learning_rate": 3.0852069130404284e-05,
+      "loss": 1.2767,
+      "step": 357
+    },
+    {
+      "epoch": 0.750524109014675,
+      "grad_norm": 0.33006346225738525,
+      "learning_rate": 3.036766852396561e-05,
+      "loss": 0.9687,
+      "step": 358
+    },
+    {
+      "epoch": 0.7526205450733753,
+      "grad_norm": 0.35846400260925293,
+      "learning_rate": 2.9886419112559394e-05,
+      "loss": 1.2036,
+      "step": 359
+    },
+    {
+      "epoch": 0.7547169811320755,
+      "grad_norm": 0.35825300216674805,
+      "learning_rate": 2.9408342675016286e-05,
+      "loss": 1.4004,
+      "step": 360
+    },
+    {
+      "epoch": 0.7547169811320755,
+      "eval_loss": 1.2862719297409058,
+      "eval_runtime": 13.9861,
+      "eval_samples_per_second": 14.371,
+      "eval_steps_per_second": 7.221,
+      "step": 360
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 1.528664851092603e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null