Training in progress, step 6000, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +283 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ed7f5192373055df50388d1e8a342b0008cc7f264c290f2f40d0816847f2899
 size 1520630616

 version https://git-lfs.github.com/spec/v1
+oid sha256:9310a4b888df283774971e4e671540bfed2da01aea080fa39eda067305eeba86
 size 1520630616

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b8be61aa4b411ba072b5dd099697cc18dd1215103eeea9cd79dbfb70d181d7a
 size 3041448587

 version https://git-lfs.github.com/spec/v1
+oid sha256:f1f256b63f8887aa92c9795198c14b259ff29bd76f4e601214dd8ad4add4ccd6
 size 3041448587

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82f11385365889b74991a13277667854d4ee120983e8addb357d466767c0b9ff
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:2669ee2d37691d1bc42e7a0090a126e105acbd5de1cf305e31cb6b68e55636b7
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ac42a4d50be277865df4f8c22478009406dfd138fc6ebe8a41f41d644b86db8
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:a30b126d1da8ae8870320a9f300ee7d428169650eb20c3a488c09fc00bef14d8
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0060874316939892,
   "eval_steps": 500,
-  "global_step": 4000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -568,6 +568,286 @@
       "learning_rate": 0.00029976166518534735,
       "loss": 2.4739,
       "step": 4000
     }
   ],
   "logging_steps": 50,
@@ -587,7 +867,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.1391181977674056e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0279453551912567,
   "eval_steps": 500,
+  "global_step": 6000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00029976166518534735,
       "loss": 2.4739,
       "step": 4000
+    },
+    {
+      "epoch": 1.0066338797814207,
+      "grad_norm": 0.5546875,
+      "learning_rate": 0.00029974653116842764,
+      "loss": 2.4487,
+      "step": 4050
+    },
+    {
+      "epoch": 1.0071803278688525,
+      "grad_norm": 0.60546875,
+      "learning_rate": 0.0002997309317358347,
+      "loss": 2.4674,
+      "step": 4100
+    },
+    {
+      "epoch": 1.0077267759562842,
+      "grad_norm": 0.67578125,
+      "learning_rate": 0.0002997148669360519,
+      "loss": 2.4814,
+      "step": 4150
+    },
+    {
+      "epoch": 1.0082732240437158,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00029969833681900914,
+      "loss": 2.448,
+      "step": 4200
+    },
+    {
+      "epoch": 1.0088196721311475,
+      "grad_norm": 0.68359375,
+      "learning_rate": 0.0002996813414360822,
+      "loss": 2.4299,
+      "step": 4250
+    },
+    {
+      "epoch": 1.0093661202185793,
+      "grad_norm": 0.57421875,
+      "learning_rate": 0.00029966388084009334,
+      "loss": 2.4271,
+      "step": 4300
+    },
+    {
+      "epoch": 1.0099125683060108,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.00029964595508531034,
+      "loss": 2.4848,
+      "step": 4350
+    },
+    {
+      "epoch": 1.0104590163934426,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.00029962756422744695,
+      "loss": 2.414,
+      "step": 4400
+    },
+    {
+      "epoch": 1.0110054644808744,
+      "grad_norm": 0.51171875,
+      "learning_rate": 0.00029960870832366224,
+      "loss": 2.3993,
+      "step": 4450
+    },
+    {
+      "epoch": 1.0115519125683061,
+      "grad_norm": 0.54296875,
+      "learning_rate": 0.000299589387432561,
+      "loss": 2.4171,
+      "step": 4500
+    },
+    {
+      "epoch": 1.0120983606557377,
+      "grad_norm": 0.490234375,
+      "learning_rate": 0.00029956960161419283,
+      "loss": 2.4038,
+      "step": 4550
+    },
+    {
+      "epoch": 1.0126448087431694,
+      "grad_norm": 0.498046875,
+      "learning_rate": 0.0002995493509300526,
+      "loss": 2.4128,
+      "step": 4600
+    },
+    {
+      "epoch": 1.0131912568306012,
+      "grad_norm": 0.51171875,
+      "learning_rate": 0.0002995286354430799,
+      "loss": 2.3721,
+      "step": 4650
+    },
+    {
+      "epoch": 1.0137377049180327,
+      "grad_norm": 0.51171875,
+      "learning_rate": 0.0002995074552176589,
+      "loss": 2.3734,
+      "step": 4700
+    },
+    {
+      "epoch": 1.0142841530054645,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.00029948581031961826,
+      "loss": 2.3805,
+      "step": 4750
+    },
+    {
+      "epoch": 1.0148306010928962,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.0002994637008162308,
+      "loss": 2.3819,
+      "step": 4800
+    },
+    {
+      "epoch": 1.0153770491803278,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00029944112677621345,
+      "loss": 2.3839,
+      "step": 4850
+    },
+    {
+      "epoch": 1.0159234972677595,
+      "grad_norm": 0.4921875,
+      "learning_rate": 0.00029941808826972673,
+      "loss": 2.336,
+      "step": 4900
+    },
+    {
+      "epoch": 1.0164699453551913,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.0002993945853683749,
+      "loss": 2.3126,
+      "step": 4950
+    },
+    {
+      "epoch": 1.0170163934426228,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00029937061814520546,
+      "loss": 2.3271,
+      "step": 5000
+    },
+    {
+      "epoch": 1.0175628415300546,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.00029934618667470925,
+      "loss": 2.3275,
+      "step": 5050
+    },
+    {
+      "epoch": 1.0181092896174864,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.0002993212910328197,
+      "loss": 2.2837,
+      "step": 5100
+    },
+    {
+      "epoch": 1.0186557377049181,
+      "grad_norm": 0.56640625,
+      "learning_rate": 0.00029929593129691305,
+      "loss": 2.2964,
+      "step": 5150
+    },
+    {
+      "epoch": 1.0192021857923497,
+      "grad_norm": 0.50390625,
+      "learning_rate": 0.000299270107545808,
+      "loss": 2.3155,
+      "step": 5200
+    },
+    {
+      "epoch": 1.0197486338797814,
+      "grad_norm": 0.55078125,
+      "learning_rate": 0.00029924381985976534,
+      "loss": 2.2722,
+      "step": 5250
+    },
+    {
+      "epoch": 1.0202950819672132,
+      "grad_norm": 0.49609375,
+      "learning_rate": 0.00029921706832048784,
+      "loss": 2.3175,
+      "step": 5300
+    },
+    {
+      "epoch": 1.0208415300546447,
+      "grad_norm": 0.48828125,
+      "learning_rate": 0.00029918985301111985,
+      "loss": 2.2834,
+      "step": 5350
+    },
+    {
+      "epoch": 1.0213879781420765,
+      "grad_norm": 0.734375,
+      "learning_rate": 0.00029916217401624716,
+      "loss": 2.2522,
+      "step": 5400
+    },
+    {
+      "epoch": 1.0219344262295083,
+      "grad_norm": 0.46484375,
+      "learning_rate": 0.00029913403142189677,
+      "loss": 2.2872,
+      "step": 5450
+    },
+    {
+      "epoch": 1.0224808743169398,
+      "grad_norm": 0.51953125,
+      "learning_rate": 0.00029910542531553656,
+      "loss": 2.2793,
+      "step": 5500
+    },
+    {
+      "epoch": 1.0230273224043716,
+      "grad_norm": 0.474609375,
+      "learning_rate": 0.00029907635578607487,
+      "loss": 2.218,
+      "step": 5550
+    },
+    {
+      "epoch": 1.0235737704918033,
+      "grad_norm": 0.49609375,
+      "learning_rate": 0.00029904682292386053,
+      "loss": 2.2309,
+      "step": 5600
+    },
+    {
+      "epoch": 1.024120218579235,
+      "grad_norm": 0.52734375,
+      "learning_rate": 0.0002990168268206823,
+      "loss": 2.2285,
+      "step": 5650
+    },
+    {
+      "epoch": 1.0246666666666666,
+      "grad_norm": 0.48828125,
+      "learning_rate": 0.00029898636756976884,
+      "loss": 2.2338,
+      "step": 5700
+    },
+    {
+      "epoch": 1.0252131147540984,
+      "grad_norm": 0.462890625,
+      "learning_rate": 0.0002989554452657881,
+      "loss": 2.2048,
+      "step": 5750
+    },
+    {
+      "epoch": 1.0257595628415301,
+      "grad_norm": 0.62109375,
+      "learning_rate": 0.0002989240600048475,
+      "loss": 2.2716,
+      "step": 5800
+    },
+    {
+      "epoch": 1.0263060109289617,
+      "grad_norm": 0.6015625,
+      "learning_rate": 0.00029889221188449295,
+      "loss": 2.2618,
+      "step": 5850
+    },
+    {
+      "epoch": 1.0268524590163934,
+      "grad_norm": 0.47265625,
+      "learning_rate": 0.0002988599010037092,
+      "loss": 2.2181,
+      "step": 5900
+    },
+    {
+      "epoch": 1.0273989071038252,
+      "grad_norm": 0.5234375,
+      "learning_rate": 0.0002988271274629192,
+      "loss": 2.2005,
+      "step": 5950
+    },
+    {
+      "epoch": 1.0279453551912567,
+      "grad_norm": 0.515625,
+      "learning_rate": 0.00029879389136398403,
+      "loss": 2.1958,
+      "step": 6000
     }
   ],
   "logging_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 3.2086020985643336e+18,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null