Training in progress, step 283, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +312 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:28702cfc13f12e38ed7e8748902970e8f15746494ae60bf0c05bb32ce4cfb306
 size 1140880624

 version https://git-lfs.github.com/spec/v1
+oid sha256:eb4636f56b84a29bc498149f8fa29ab96cd149578b787eaec38b88594a715d2c
 size 1140880624

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:042a1bb5adc484f5d364789a71e9be5c00f2196d7fb29dac5dfeacd44c11616c
 size 2281891834

 version https://git-lfs.github.com/spec/v1
+oid sha256:f440c1bf2cce0bb3890a1db85b5dfa6f57d9db3073b20a1ea822a1fd304bdb03
 size 2281891834

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:451887e4835bc2148eb7c8fb62a54fce7c4115e0b23975de9fe5ccb06c8afce4
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:64af67540b37ea7895c7f0895b6fd3530a619682b8fb6206680dab892bb34ea0
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:372e6fe050f71f63f824569eaebe72289a3e5447184748fac9f5e02dd918695e
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:4512787257fe5eeb9de23bd70dafa8e125bcb4548e42b7e1a007385ee165b1c3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8445229681978799,
   "eval_steps": 500,
-  "global_step": 239,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1680,6 +1680,314 @@
       "learning_rate": 1.3598609942150765e-05,
       "loss": 2.6368,
       "step": 239
     }
   ],
   "logging_steps": 1,
@@ -1694,12 +2002,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.4782353377329152e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 283,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.3598609942150765e-05,
       "loss": 2.6368,
       "step": 239
+    },
+    {
+      "epoch": 0.8480565371024735,
+      "grad_norm": 4.957668781280518,
+      "learning_rate": 1.3015556956751669e-05,
+      "loss": 2.4514,
+      "step": 240
+    },
+    {
+      "epoch": 0.8515901060070671,
+      "grad_norm": 5.872159004211426,
+      "learning_rate": 1.2443403456474017e-05,
+      "loss": 2.6689,
+      "step": 241
+    },
+    {
+      "epoch": 0.8551236749116607,
+      "grad_norm": 5.210415363311768,
+      "learning_rate": 1.1882318057580489e-05,
+      "loss": 2.5006,
+      "step": 242
+    },
+    {
+      "epoch": 0.8586572438162544,
+      "grad_norm": 4.810140132904053,
+      "learning_rate": 1.1332466114513512e-05,
+      "loss": 2.193,
+      "step": 243
+    },
+    {
+      "epoch": 0.8621908127208481,
+      "grad_norm": 5.142510414123535,
+      "learning_rate": 1.0794009671164484e-05,
+      "loss": 2.3454,
+      "step": 244
+    },
+    {
+      "epoch": 0.8657243816254417,
+      "grad_norm": 5.115345478057861,
+      "learning_rate": 1.0267107413118742e-05,
+      "loss": 2.5682,
+      "step": 245
+    },
+    {
+      "epoch": 0.8692579505300353,
+      "grad_norm": 5.960536479949951,
+      "learning_rate": 9.751914620890206e-06,
+      "loss": 2.5434,
+      "step": 246
+    },
+    {
+      "epoch": 0.872791519434629,
+      "grad_norm": 4.932643890380859,
+      "learning_rate": 9.248583124159438e-06,
+      "loss": 2.4929,
+      "step": 247
+    },
+    {
+      "epoch": 0.8763250883392226,
+      "grad_norm": 5.753042221069336,
+      "learning_rate": 8.757261257028777e-06,
+      "loss": 2.5304,
+      "step": 248
+    },
+    {
+      "epoch": 0.8798586572438163,
+      "grad_norm": 6.017014026641846,
+      "learning_rate": 8.278093814307637e-06,
+      "loss": 2.5224,
+      "step": 249
+    },
+    {
+      "epoch": 0.8833922261484098,
+      "grad_norm": 6.0025434494018555,
+      "learning_rate": 7.81122200884072e-06,
+      "loss": 2.7302,
+      "step": 250
+    },
+    {
+      "epoch": 0.8869257950530035,
+      "grad_norm": 5.745126247406006,
+      "learning_rate": 7.356783429892023e-06,
+      "loss": 2.328,
+      "step": 251
+    },
+    {
+      "epoch": 0.8904593639575972,
+      "grad_norm": 6.124932289123535,
+      "learning_rate": 6.9149120025965905e-06,
+      "loss": 2.3842,
+      "step": 252
+    },
+    {
+      "epoch": 0.8939929328621908,
+      "grad_norm": 5.819494724273682,
+      "learning_rate": 6.4857379484922375e-06,
+      "loss": 2.5136,
+      "step": 253
+    },
+    {
+      "epoch": 0.8975265017667845,
+      "grad_norm": 4.685893535614014,
+      "learning_rate": 6.069387747142591e-06,
+      "loss": 2.2869,
+      "step": 254
+    },
+    {
+      "epoch": 0.901060070671378,
+      "grad_norm": 7.096498966217041,
+      "learning_rate": 5.665984098862992e-06,
+      "loss": 2.3135,
+      "step": 255
+    },
+    {
+      "epoch": 0.9045936395759717,
+      "grad_norm": 4.7295732498168945,
+      "learning_rate": 5.275645888560232e-06,
+      "loss": 2.1201,
+      "step": 256
+    },
+    {
+      "epoch": 0.9081272084805654,
+      "grad_norm": 4.792962551116943,
+      "learning_rate": 4.898488150696467e-06,
+      "loss": 2.4277,
+      "step": 257
+    },
+    {
+      "epoch": 0.911660777385159,
+      "grad_norm": 5.9998345375061035,
+      "learning_rate": 4.534622035388214e-06,
+      "loss": 2.589,
+      "step": 258
+    },
+    {
+      "epoch": 0.9151943462897526,
+      "grad_norm": 5.028961181640625,
+      "learning_rate": 4.184154775649768e-06,
+      "loss": 2.3586,
+      "step": 259
+    },
+    {
+      "epoch": 0.9187279151943463,
+      "grad_norm": 4.820951461791992,
+      "learning_rate": 3.8471896557912e-06,
+      "loss": 2.2727,
+      "step": 260
+    },
+    {
+      "epoch": 0.9222614840989399,
+      "grad_norm": 5.404629707336426,
+      "learning_rate": 3.523825980979989e-06,
+      "loss": 2.3514,
+      "step": 261
+    },
+    {
+      "epoch": 0.9257950530035336,
+      "grad_norm": 4.80362606048584,
+      "learning_rate": 3.2141590479753236e-06,
+      "loss": 2.2188,
+      "step": 262
+    },
+    {
+      "epoch": 0.9293286219081273,
+      "grad_norm": 5.06864070892334,
+      "learning_rate": 2.918280117043709e-06,
+      "loss": 2.1602,
+      "step": 263
+    },
+    {
+      "epoch": 0.9328621908127208,
+      "grad_norm": 5.121250629425049,
+      "learning_rate": 2.636276385064157e-06,
+      "loss": 2.4903,
+      "step": 264
+    },
+    {
+      "epoch": 0.9363957597173145,
+      "grad_norm": 5.582918167114258,
+      "learning_rate": 2.3682309598308747e-06,
+      "loss": 2.379,
+      "step": 265
+    },
+    {
+      "epoch": 0.9399293286219081,
+      "grad_norm": 5.544982433319092,
+      "learning_rate": 2.114222835560986e-06,
+      "loss": 2.8578,
+      "step": 266
+    },
+    {
+      "epoch": 0.9434628975265018,
+      "grad_norm": 4.647549629211426,
+      "learning_rate": 1.8743268696145954e-06,
+      "loss": 2.2086,
+      "step": 267
+    },
+    {
+      "epoch": 0.9469964664310954,
+      "grad_norm": 4.952742576599121,
+      "learning_rate": 1.6486137604339813e-06,
+      "loss": 2.3761,
+      "step": 268
+    },
+    {
+      "epoch": 0.950530035335689,
+      "grad_norm": 5.95998477935791,
+      "learning_rate": 1.4371500267084338e-06,
+      "loss": 2.5603,
+      "step": 269
+    },
+    {
+      "epoch": 0.9540636042402827,
+      "grad_norm": 5.027519226074219,
+      "learning_rate": 1.2399979877708745e-06,
+      "loss": 2.3833,
+      "step": 270
+    },
+    {
+      "epoch": 0.9575971731448764,
+      "grad_norm": 5.592859745025635,
+      "learning_rate": 1.0572157452321097e-06,
+      "loss": 2.7075,
+      "step": 271
+    },
+    {
+      "epoch": 0.9611307420494699,
+      "grad_norm": 4.911261081695557,
+      "learning_rate": 8.888571658579703e-07,
+      "loss": 2.2938,
+      "step": 272
+    },
+    {
+      "epoch": 0.9646643109540636,
+      "grad_norm": 5.232736587524414,
+      "learning_rate": 7.349718656945504e-07,
+      "loss": 2.1393,
+      "step": 273
+    },
+    {
+      "epoch": 0.9681978798586572,
+      "grad_norm": 5.2621331214904785,
+      "learning_rate": 5.956051954461472e-07,
+      "loss": 2.7253,
+      "step": 274
+    },
+    {
+      "epoch": 0.9717314487632509,
+      "grad_norm": 4.635681629180908,
+      "learning_rate": 4.7079822711015296e-07,
+      "loss": 2.2413,
+      "step": 275
+    },
+    {
+      "epoch": 0.9752650176678446,
+      "grad_norm": 5.374542236328125,
+      "learning_rate": 3.605877418729975e-07,
+      "loss": 2.6089,
+      "step": 276
+    },
+    {
+      "epoch": 0.9787985865724381,
+      "grad_norm": 4.751713275909424,
+      "learning_rate": 2.6500621927054715e-07,
+      "loss": 2.3142,
+      "step": 277
+    },
+    {
+      "epoch": 0.9823321554770318,
+      "grad_norm": 5.122497081756592,
+      "learning_rate": 1.840818276162226e-07,
+      "loss": 2.3476,
+      "step": 278
+    },
+    {
+      "epoch": 0.9858657243816255,
+      "grad_norm": 6.561892509460449,
+      "learning_rate": 1.1783841569968367e-07,
+      "loss": 2.6703,
+      "step": 279
+    },
+    {
+      "epoch": 0.9893992932862191,
+      "grad_norm": 4.851873874664307,
+      "learning_rate": 6.629550575847354e-08,
+      "loss": 2.3439,
+      "step": 280
+    },
+    {
+      "epoch": 0.9929328621908127,
+      "grad_norm": 5.513582229614258,
+      "learning_rate": 2.946828772473764e-08,
+      "loss": 2.6915,
+      "step": 281
+    },
+    {
+      "epoch": 0.9964664310954063,
+      "grad_norm": 5.608765602111816,
+      "learning_rate": 7.36761474865455e-09,
+      "loss": 2.5523,
+      "step": 282
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 5.803535461425781,
+      "learning_rate": 0.0,
+      "loss": 2.6483,
+      "step": 283
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.7503790819180544e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null