Training in progress, step 249, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +592 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f9e7aff42b36fe14e95ece06193160112474b8a29fc3680ce273c922ca5686f6
 size 216151256

 version https://git-lfs.github.com/spec/v1
+oid sha256:12fef18f99c9bf3ec9eedf986ccbe12d2b84ec11b66e3b9788ae7ec43065b3d7
 size 216151256

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e21f976a284dd81c64396ec6b6206079943029f7c09ac486e503562b06e47e6
 size 432640054

 version https://git-lfs.github.com/spec/v1
+oid sha256:541a7b0bf8fbbc06854b3c570354a27505018117b0f0d67f11955711b3bef1b4
 size 432640054

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb3607b5839cda7054779e8f957cbf2db3456879873cc4e34eac04cbf33f5db8
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:341f7be18cc89c2ad2dec55ac567729ae8e5db65bdf39a5bc196fbd79e2cbf16
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fd635f6ad590a43a7a075b3fb4377adaa95cf2d835f115014607cf181d2b6449
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:d870e1e8472cc5e0d2cb8fe273473f96e2cec9ffdf9fce6a51cdd5b21cd3bae6
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3070c5337425657c2fec031251a5e4e8042c43dd7a5d4d7f77fa453b02282be
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f919157faf64362df2e66ee2a7671eb7f6cf8287caadba981f351111997c856
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.5001883239171375,
   "eval_steps": 83,
-  "global_step": 166,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1193,6 +1193,595 @@
       "eval_samples_per_second": 6.002,
       "eval_steps_per_second": 1.503,
       "step": 166
     }
   ],
   "logging_steps": 1,
@@ -1212,7 +1801,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.46878716765012e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7502824858757062,
   "eval_steps": 83,
+  "global_step": 249,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.002,
       "eval_steps_per_second": 1.503,
       "step": 166
+    },
+    {
+      "epoch": 0.5032015065913371,
+      "grad_norm": 1.2678625583648682,
+      "learning_rate": 6.298594285815584e-05,
+      "loss": 0.2982,
+      "step": 167
+    },
+    {
+      "epoch": 0.5062146892655367,
+      "grad_norm": 1.2661633491516113,
+      "learning_rate": 6.244532285066382e-05,
+      "loss": 0.3381,
+      "step": 168
+    },
+    {
+      "epoch": 0.5092278719397364,
+      "grad_norm": 1.160649061203003,
+      "learning_rate": 6.190314727607196e-05,
+      "loss": 0.3428,
+      "step": 169
+    },
+    {
+      "epoch": 0.512241054613936,
+      "grad_norm": 2.1445140838623047,
+      "learning_rate": 6.13594839020466e-05,
+      "loss": 0.6844,
+      "step": 170
+    },
+    {
+      "epoch": 0.5152542372881356,
+      "grad_norm": 1.9892338514328003,
+      "learning_rate": 6.0814400682217234e-05,
+      "loss": 0.559,
+      "step": 171
+    },
+    {
+      "epoch": 0.5182674199623352,
+      "grad_norm": 3.0277585983276367,
+      "learning_rate": 6.026796574768288e-05,
+      "loss": 0.6495,
+      "step": 172
+    },
+    {
+      "epoch": 0.5212806026365349,
+      "grad_norm": 3.0984301567077637,
+      "learning_rate": 5.972024739849622e-05,
+      "loss": 0.4114,
+      "step": 173
+    },
+    {
+      "epoch": 0.5242937853107345,
+      "grad_norm": 3.296858549118042,
+      "learning_rate": 5.917131409512663e-05,
+      "loss": 0.5272,
+      "step": 174
+    },
+    {
+      "epoch": 0.527306967984934,
+      "grad_norm": 4.093991756439209,
+      "learning_rate": 5.862123444990318e-05,
+      "loss": 0.5134,
+      "step": 175
+    },
+    {
+      "epoch": 0.5303201506591337,
+      "grad_norm": 1.48560631275177,
+      "learning_rate": 5.807007721843861e-05,
+      "loss": 0.5482,
+      "step": 176
+    },
+    {
+      "epoch": 0.5333333333333333,
+      "grad_norm": 1.6580932140350342,
+      "learning_rate": 5.751791129103544e-05,
+      "loss": 0.5894,
+      "step": 177
+    },
+    {
+      "epoch": 0.5363465160075329,
+      "grad_norm": 1.1557704210281372,
+      "learning_rate": 5.696480568407523e-05,
+      "loss": 0.4388,
+      "step": 178
+    },
+    {
+      "epoch": 0.5393596986817326,
+      "grad_norm": 1.1503976583480835,
+      "learning_rate": 5.6410829531392006e-05,
+      "loss": 0.3841,
+      "step": 179
+    },
+    {
+      "epoch": 0.5423728813559322,
+      "grad_norm": 1.0935317277908325,
+      "learning_rate": 5.585605207563124e-05,
+      "loss": 0.38,
+      "step": 180
+    },
+    {
+      "epoch": 0.5453860640301318,
+      "grad_norm": 1.0500688552856445,
+      "learning_rate": 5.5300542659594854e-05,
+      "loss": 0.37,
+      "step": 181
+    },
+    {
+      "epoch": 0.5483992467043315,
+      "grad_norm": 0.910677433013916,
+      "learning_rate": 5.47443707175741e-05,
+      "loss": 0.335,
+      "step": 182
+    },
+    {
+      "epoch": 0.5514124293785311,
+      "grad_norm": 1.0660496950149536,
+      "learning_rate": 5.418760576667071e-05,
+      "loss": 0.3516,
+      "step": 183
+    },
+    {
+      "epoch": 0.5544256120527307,
+      "grad_norm": 0.8869411945343018,
+      "learning_rate": 5.3630317398107864e-05,
+      "loss": 0.3249,
+      "step": 184
+    },
+    {
+      "epoch": 0.5574387947269304,
+      "grad_norm": 1.6419062614440918,
+      "learning_rate": 5.3072575268531835e-05,
+      "loss": 0.3531,
+      "step": 185
+    },
+    {
+      "epoch": 0.56045197740113,
+      "grad_norm": 0.8144354820251465,
+      "learning_rate": 5.2514449091305375e-05,
+      "loss": 0.3002,
+      "step": 186
+    },
+    {
+      "epoch": 0.5634651600753295,
+      "grad_norm": 1.1171809434890747,
+      "learning_rate": 5.195600862779421e-05,
+      "loss": 0.3776,
+      "step": 187
+    },
+    {
+      "epoch": 0.5664783427495292,
+      "grad_norm": 1.14911687374115,
+      "learning_rate": 5.139732367864736e-05,
+      "loss": 0.3215,
+      "step": 188
+    },
+    {
+      "epoch": 0.5694915254237288,
+      "grad_norm": 0.928807258605957,
+      "learning_rate": 5.083846407507263e-05,
+      "loss": 0.2747,
+      "step": 189
+    },
+    {
+      "epoch": 0.5725047080979284,
+      "grad_norm": 1.024272084236145,
+      "learning_rate": 5.0279499670108245e-05,
+      "loss": 0.3024,
+      "step": 190
+    },
+    {
+      "epoch": 0.5755178907721281,
+      "grad_norm": 1.2534008026123047,
+      "learning_rate": 4.972050032989175e-05,
+      "loss": 0.3704,
+      "step": 191
+    },
+    {
+      "epoch": 0.5785310734463277,
+      "grad_norm": 0.9777889847755432,
+      "learning_rate": 4.9161535924927374e-05,
+      "loss": 0.271,
+      "step": 192
+    },
+    {
+      "epoch": 0.5815442561205273,
+      "grad_norm": 1.0151127576828003,
+      "learning_rate": 4.860267632135265e-05,
+      "loss": 0.3179,
+      "step": 193
+    },
+    {
+      "epoch": 0.5845574387947269,
+      "grad_norm": 0.9281553030014038,
+      "learning_rate": 4.80439913722058e-05,
+      "loss": 0.2508,
+      "step": 194
+    },
+    {
+      "epoch": 0.5875706214689266,
+      "grad_norm": 1.6091618537902832,
+      "learning_rate": 4.748555090869464e-05,
+      "loss": 0.3701,
+      "step": 195
+    },
+    {
+      "epoch": 0.5905838041431262,
+      "grad_norm": 2.309112548828125,
+      "learning_rate": 4.692742473146818e-05,
+      "loss": 0.5701,
+      "step": 196
+    },
+    {
+      "epoch": 0.5935969868173258,
+      "grad_norm": 1.9035515785217285,
+      "learning_rate": 4.636968260189214e-05,
+      "loss": 0.5654,
+      "step": 197
+    },
+    {
+      "epoch": 0.5966101694915255,
+      "grad_norm": 2.222712516784668,
+      "learning_rate": 4.5812394233329305e-05,
+      "loss": 0.51,
+      "step": 198
+    },
+    {
+      "epoch": 0.599623352165725,
+      "grad_norm": 3.1770148277282715,
+      "learning_rate": 4.525562928242592e-05,
+      "loss": 0.6322,
+      "step": 199
+    },
+    {
+      "epoch": 0.6026365348399246,
+      "grad_norm": 3.6650121212005615,
+      "learning_rate": 4.4699457340405164e-05,
+      "loss": 0.4471,
+      "step": 200
+    },
+    {
+      "epoch": 0.6056497175141243,
+      "grad_norm": 1.350001573562622,
+      "learning_rate": 4.414394792436877e-05,
+      "loss": 0.4445,
+      "step": 201
+    },
+    {
+      "epoch": 0.6086629001883239,
+      "grad_norm": 1.3186067342758179,
+      "learning_rate": 4.3589170468607985e-05,
+      "loss": 0.4268,
+      "step": 202
+    },
+    {
+      "epoch": 0.6116760828625235,
+      "grad_norm": 1.0820094347000122,
+      "learning_rate": 4.3035194315924785e-05,
+      "loss": 0.349,
+      "step": 203
+    },
+    {
+      "epoch": 0.6146892655367232,
+      "grad_norm": 1.016855001449585,
+      "learning_rate": 4.248208870896456e-05,
+      "loss": 0.3657,
+      "step": 204
+    },
+    {
+      "epoch": 0.6177024482109228,
+      "grad_norm": 0.7627567648887634,
+      "learning_rate": 4.192992278156141e-05,
+      "loss": 0.282,
+      "step": 205
+    },
+    {
+      "epoch": 0.6207156308851224,
+      "grad_norm": 0.8734930753707886,
+      "learning_rate": 4.1378765550096835e-05,
+      "loss": 0.3205,
+      "step": 206
+    },
+    {
+      "epoch": 0.6237288135593221,
+      "grad_norm": 0.9233391880989075,
+      "learning_rate": 4.082868590487339e-05,
+      "loss": 0.2682,
+      "step": 207
+    },
+    {
+      "epoch": 0.6267419962335217,
+      "grad_norm": 0.7341740131378174,
+      "learning_rate": 4.027975260150381e-05,
+      "loss": 0.2488,
+      "step": 208
+    },
+    {
+      "epoch": 0.6297551789077213,
+      "grad_norm": 0.8570201992988586,
+      "learning_rate": 3.973203425231715e-05,
+      "loss": 0.2644,
+      "step": 209
+    },
+    {
+      "epoch": 0.632768361581921,
+      "grad_norm": 0.8284196853637695,
+      "learning_rate": 3.918559931778277e-05,
+      "loss": 0.3093,
+      "step": 210
+    },
+    {
+      "epoch": 0.6357815442561205,
+      "grad_norm": 0.7870326638221741,
+      "learning_rate": 3.8640516097953405e-05,
+      "loss": 0.2577,
+      "step": 211
+    },
+    {
+      "epoch": 0.6387947269303201,
+      "grad_norm": 0.9884381294250488,
+      "learning_rate": 3.809685272392804e-05,
+      "loss": 0.3252,
+      "step": 212
+    },
+    {
+      "epoch": 0.6418079096045197,
+      "grad_norm": 1.1668404340744019,
+      "learning_rate": 3.755467714933619e-05,
+      "loss": 0.3414,
+      "step": 213
+    },
+    {
+      "epoch": 0.6448210922787194,
+      "grad_norm": 1.1960536241531372,
+      "learning_rate": 3.701405714184416e-05,
+      "loss": 0.3029,
+      "step": 214
+    },
+    {
+      "epoch": 0.647834274952919,
+      "grad_norm": 0.7987526059150696,
+      "learning_rate": 3.647506027468467e-05,
+      "loss": 0.2501,
+      "step": 215
+    },
+    {
+      "epoch": 0.6508474576271186,
+      "grad_norm": 1.0721232891082764,
+      "learning_rate": 3.59377539182107e-05,
+      "loss": 0.3132,
+      "step": 216
+    },
+    {
+      "epoch": 0.6538606403013183,
+      "grad_norm": 0.9739212393760681,
+      "learning_rate": 3.5402205231474736e-05,
+      "loss": 0.2644,
+      "step": 217
+    },
+    {
+      "epoch": 0.6568738229755179,
+      "grad_norm": 1.1942991018295288,
+      "learning_rate": 3.486848115383445e-05,
+      "loss": 0.3206,
+      "step": 218
+    },
+    {
+      "epoch": 0.6598870056497175,
+      "grad_norm": 1.3838952779769897,
+      "learning_rate": 3.4336648396585776e-05,
+      "loss": 0.3569,
+      "step": 219
+    },
+    {
+      "epoch": 0.6629001883239172,
+      "grad_norm": 2.448126792907715,
+      "learning_rate": 3.380677343462447e-05,
+      "loss": 0.5818,
+      "step": 220
+    },
+    {
+      "epoch": 0.6659133709981168,
+      "grad_norm": 1.8376816511154175,
+      "learning_rate": 3.327892249813745e-05,
+      "loss": 0.4343,
+      "step": 221
+    },
+    {
+      "epoch": 0.6689265536723163,
+      "grad_norm": 2.102494478225708,
+      "learning_rate": 3.275316156432434e-05,
+      "loss": 0.4626,
+      "step": 222
+    },
+    {
+      "epoch": 0.671939736346516,
+      "grad_norm": 2.167078733444214,
+      "learning_rate": 3.2229556349150945e-05,
+      "loss": 0.4407,
+      "step": 223
+    },
+    {
+      "epoch": 0.6749529190207156,
+      "grad_norm": 2.149308204650879,
+      "learning_rate": 3.170817229913526e-05,
+      "loss": 0.3198,
+      "step": 224
+    },
+    {
+      "epoch": 0.6779661016949152,
+      "grad_norm": 4.306909084320068,
+      "learning_rate": 3.118907458316722e-05,
+      "loss": 0.5187,
+      "step": 225
+    },
+    {
+      "epoch": 0.6809792843691149,
+      "grad_norm": 1.0858993530273438,
+      "learning_rate": 3.067232808436299e-05,
+      "loss": 0.3973,
+      "step": 226
+    },
+    {
+      "epoch": 0.6839924670433145,
+      "grad_norm": 0.9466880559921265,
+      "learning_rate": 3.0157997391955172e-05,
+      "loss": 0.2911,
+      "step": 227
+    },
+    {
+      "epoch": 0.6870056497175141,
+      "grad_norm": 1.053808569908142,
+      "learning_rate": 2.964614679321966e-05,
+      "loss": 0.3425,
+      "step": 228
+    },
+    {
+      "epoch": 0.6900188323917138,
+      "grad_norm": 1.020087718963623,
+      "learning_rate": 2.913684026544021e-05,
+      "loss": 0.3171,
+      "step": 229
+    },
+    {
+      "epoch": 0.6930320150659134,
+      "grad_norm": 0.9112816452980042,
+      "learning_rate": 2.8630141467911775e-05,
+      "loss": 0.289,
+      "step": 230
+    },
+    {
+      "epoch": 0.696045197740113,
+      "grad_norm": 0.9472637176513672,
+      "learning_rate": 2.812611373398365e-05,
+      "loss": 0.2909,
+      "step": 231
+    },
+    {
+      "epoch": 0.6990583804143126,
+      "grad_norm": 0.8144400715827942,
+      "learning_rate": 2.762482006314324e-05,
+      "loss": 0.2527,
+      "step": 232
+    },
+    {
+      "epoch": 0.7020715630885123,
+      "grad_norm": 0.8899109363555908,
+      "learning_rate": 2.712632311314165e-05,
+      "loss": 0.2814,
+      "step": 233
+    },
+    {
+      "epoch": 0.7050847457627119,
+      "grad_norm": 0.8338634967803955,
+      "learning_rate": 2.6630685192161992e-05,
+      "loss": 0.2684,
+      "step": 234
+    },
+    {
+      "epoch": 0.7080979284369114,
+      "grad_norm": 1.1928447484970093,
+      "learning_rate": 2.6137968251031287e-05,
+      "loss": 0.327,
+      "step": 235
+    },
+    {
+      "epoch": 0.7111111111111111,
+      "grad_norm": 0.9519332647323608,
+      "learning_rate": 2.5648233875477157e-05,
+      "loss": 0.2797,
+      "step": 236
+    },
+    {
+      "epoch": 0.7141242937853107,
+      "grad_norm": 1.1364781856536865,
+      "learning_rate": 2.5161543278430054e-05,
+      "loss": 0.3121,
+      "step": 237
+    },
+    {
+      "epoch": 0.7171374764595103,
+      "grad_norm": 1.0575398206710815,
+      "learning_rate": 2.4677957292372167e-05,
+      "loss": 0.2866,
+      "step": 238
+    },
+    {
+      "epoch": 0.72015065913371,
+      "grad_norm": 0.7296847105026245,
+      "learning_rate": 2.419753636173379e-05,
+      "loss": 0.2432,
+      "step": 239
+    },
+    {
+      "epoch": 0.7231638418079096,
+      "grad_norm": 0.7573246955871582,
+      "learning_rate": 2.3720340535338348e-05,
+      "loss": 0.2545,
+      "step": 240
+    },
+    {
+      "epoch": 0.7261770244821092,
+      "grad_norm": 0.8128641247749329,
+      "learning_rate": 2.3246429458896634e-05,
+      "loss": 0.2548,
+      "step": 241
+    },
+    {
+      "epoch": 0.7291902071563089,
+      "grad_norm": 0.8031213283538818,
+      "learning_rate": 2.2775862367551644e-05,
+      "loss": 0.2509,
+      "step": 242
+    },
+    {
+      "epoch": 0.7322033898305085,
+      "grad_norm": 0.9279189109802246,
+      "learning_rate": 2.2308698078474645e-05,
+      "loss": 0.264,
+      "step": 243
+    },
+    {
+      "epoch": 0.7352165725047081,
+      "grad_norm": 1.0536640882492065,
+      "learning_rate": 2.1844994983513467e-05,
+      "loss": 0.3295,
+      "step": 244
+    },
+    {
+      "epoch": 0.7382297551789078,
+      "grad_norm": 2.177769422531128,
+      "learning_rate": 2.1384811041894055e-05,
+      "loss": 0.4975,
+      "step": 245
+    },
+    {
+      "epoch": 0.7412429378531074,
+      "grad_norm": 1.5739905834197998,
+      "learning_rate": 2.0928203772975917e-05,
+      "loss": 0.4395,
+      "step": 246
+    },
+    {
+      "epoch": 0.7442561205273069,
+      "grad_norm": 1.7161879539489746,
+      "learning_rate": 2.0475230249062725e-05,
+      "loss": 0.3947,
+      "step": 247
+    },
+    {
+      "epoch": 0.7472693032015066,
+      "grad_norm": 3.185561180114746,
+      "learning_rate": 2.0025947088268717e-05,
+      "loss": 0.6166,
+      "step": 248
+    },
+    {
+      "epoch": 0.7502824858757062,
+      "grad_norm": 2.201467275619507,
+      "learning_rate": 1.958041044744186e-05,
+      "loss": 0.3779,
+      "step": 249
+    },
+    {
+      "epoch": 0.7502824858757062,
+      "eval_loss": NaN,
+      "eval_runtime": 92.9008,
+      "eval_samples_per_second": 6.017,
+      "eval_steps_per_second": 1.507,
+      "step": 249
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8.20318075147518e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null