Training in progress, step 247200, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/tokenizer_config.json +1 -1
last-checkpoint/trainer_state.json +234 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
-    "q_proj",
-    "k_proj",
     "up_proj",
-    "gate_proj",
     "v_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
     "down_proj",
     "up_proj",
+    "q_proj",
     "v_proj",
+    "o_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ad2c04e4c9d9778549e502f8f4d5e5c7678fc1dcb6dbaa7898e81a74d789ffe6
 size 1342238560

 version https://git-lfs.github.com/spec/v1
+oid sha256:26cdd869c2b4ef51426ac8e4543dd00178c3d7969b70ed9815e4cdd56536de38
 size 1342238560

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7abfc3edc0f50712b10a6abd65117145ee7a61c6581f5ef5f334f78cac8b278
 size 683268498

 version https://git-lfs.github.com/spec/v1
+oid sha256:fed46b67619b196bd11398b24691e1dc344bc786624293d833d81496772323ba
 size 683268498

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fdf2e2cd9b38a414a8e4dfabaffbbc3519d4a60f21cea7d14c955d0395100be6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b46cf230fb1dcfff937d34e6fba4e48cdf99fb6180c75deb7bd7cfc11bd65f9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5c2f6216cdb6e05487f89478e270dd1586ad361fb78787891ff688d5d1f5b80
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6758d545d7403ec126f36e0141cd2cb64797d86a13dce36859bd0cdff2824ef9
 size 1064

last-checkpoint/tokenizer_config.json CHANGED Viewed

@@ -2064,7 +2064,7 @@
   "pad_token": "<|eot_id|>",
   "padding_side": "left",
   "stride": 0,
-  "tokenizer_class": "PreTrainedTokenizerFast",
   "truncation_side": "right",
   "truncation_strategy": "longest_first"
 }

   "pad_token": "<|eot_id|>",
   "padding_side": "left",
   "stride": 0,
+  "tokenizer_class": "PreTrainedTokenizer",
   "truncation_side": "right",
   "truncation_strategy": "longest_first"
 }

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.16733305467673631,
   "eval_steps": 500,
-  "global_step": 240600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -8428,6 +8428,237 @@
       "learning_rate": 1.9759580630399218e-05,
       "loss": 1.6874,
       "step": 240600
     }
   ],
   "logging_steps": 200,
@@ -8447,7 +8678,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.2024253058097725e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1719232382214847,
   "eval_steps": 500,
+  "global_step": 247200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9759580630399218e-05,
       "loss": 1.6874,
       "step": 240600
+    },
+    {
+      "epoch": 0.1674721511477893,
+      "grad_norm": 5.881978511810303,
+      "learning_rate": 1.9759183073216768e-05,
+      "loss": 1.6754,
+      "step": 240800
+    },
+    {
+      "epoch": 0.16761124761884227,
+      "grad_norm": 4.917115688323975,
+      "learning_rate": 1.9758785193358672e-05,
+      "loss": 1.6594,
+      "step": 241000
+    },
+    {
+      "epoch": 0.16775034408989525,
+      "grad_norm": 7.00810432434082,
+      "learning_rate": 1.9758386990843928e-05,
+      "loss": 1.6253,
+      "step": 241200
+    },
+    {
+      "epoch": 0.16788944056094826,
+      "grad_norm": 3.8624985218048096,
+      "learning_rate": 1.9757988465691542e-05,
+      "loss": 1.6543,
+      "step": 241400
+    },
+    {
+      "epoch": 0.16802853703200124,
+      "grad_norm": 6.821996212005615,
+      "learning_rate": 1.9757589617920542e-05,
+      "loss": 1.674,
+      "step": 241600
+    },
+    {
+      "epoch": 0.16816763350305422,
+      "grad_norm": 7.101013660430908,
+      "learning_rate": 1.9757190447549967e-05,
+      "loss": 1.6478,
+      "step": 241800
+    },
+    {
+      "epoch": 0.1683067299741072,
+      "grad_norm": 3.3569910526275635,
+      "learning_rate": 1.9756790954598874e-05,
+      "loss": 1.6584,
+      "step": 242000
+    },
+    {
+      "epoch": 0.16844582644516018,
+      "grad_norm": 3.9595654010772705,
+      "learning_rate": 1.9756391139086332e-05,
+      "loss": 1.6638,
+      "step": 242200
+    },
+    {
+      "epoch": 0.16858492291621316,
+      "grad_norm": 7.490050315856934,
+      "learning_rate": 1.9755991001031433e-05,
+      "loss": 1.6073,
+      "step": 242400
+    },
+    {
+      "epoch": 0.16872401938726614,
+      "grad_norm": 6.08933162689209,
+      "learning_rate": 1.9755590540453275e-05,
+      "loss": 1.6431,
+      "step": 242600
+    },
+    {
+      "epoch": 0.16886311585831912,
+      "grad_norm": 5.067729473114014,
+      "learning_rate": 1.9755189757370973e-05,
+      "loss": 1.6102,
+      "step": 242800
+    },
+    {
+      "epoch": 0.1690022123293721,
+      "grad_norm": 5.07871150970459,
+      "learning_rate": 1.9754788651803664e-05,
+      "loss": 1.6425,
+      "step": 243000
+    },
+    {
+      "epoch": 0.16914130880042508,
+      "grad_norm": 4.212064743041992,
+      "learning_rate": 1.97543872237705e-05,
+      "loss": 1.6382,
+      "step": 243200
+    },
+    {
+      "epoch": 0.16928040527147806,
+      "grad_norm": 5.310381889343262,
+      "learning_rate": 1.9753985473290637e-05,
+      "loss": 1.649,
+      "step": 243400
+    },
+    {
+      "epoch": 0.16941950174253104,
+      "grad_norm": 5.563880443572998,
+      "learning_rate": 1.9753583400383262e-05,
+      "loss": 1.68,
+      "step": 243600
+    },
+    {
+      "epoch": 0.16955859821358402,
+      "grad_norm": 3.346017837524414,
+      "learning_rate": 1.975318100506756e-05,
+      "loss": 1.6314,
+      "step": 243800
+    },
+    {
+      "epoch": 0.169697694684637,
+      "grad_norm": 3.5837533473968506,
+      "learning_rate": 1.9752778287362746e-05,
+      "loss": 1.6668,
+      "step": 244000
+    },
+    {
+      "epoch": 0.16983679115568998,
+      "grad_norm": 8.021364212036133,
+      "learning_rate": 1.9752375247288046e-05,
+      "loss": 1.6824,
+      "step": 244200
+    },
+    {
+      "epoch": 0.16997588762674296,
+      "grad_norm": 6.544102191925049,
+      "learning_rate": 1.97519718848627e-05,
+      "loss": 1.6961,
+      "step": 244400
+    },
+    {
+      "epoch": 0.17011498409779593,
+      "grad_norm": 6.292764663696289,
+      "learning_rate": 1.9751568200105962e-05,
+      "loss": 1.6397,
+      "step": 244600
+    },
+    {
+      "epoch": 0.17025408056884891,
+      "grad_norm": 7.250925064086914,
+      "learning_rate": 1.9751164193037104e-05,
+      "loss": 1.7036,
+      "step": 244800
+    },
+    {
+      "epoch": 0.17039317703990192,
+      "grad_norm": 4.996527194976807,
+      "learning_rate": 1.975075986367542e-05,
+      "loss": 1.609,
+      "step": 245000
+    },
+    {
+      "epoch": 0.1705322735109549,
+      "grad_norm": 5.131301403045654,
+      "learning_rate": 1.97503552120402e-05,
+      "loss": 1.7069,
+      "step": 245200
+    },
+    {
+      "epoch": 0.17067136998200788,
+      "grad_norm": 8.829029083251953,
+      "learning_rate": 1.9749950238150776e-05,
+      "loss": 1.6709,
+      "step": 245400
+    },
+    {
+      "epoch": 0.17081046645306086,
+      "grad_norm": 5.728978633880615,
+      "learning_rate": 1.9749544942026467e-05,
+      "loss": 1.672,
+      "step": 245600
+    },
+    {
+      "epoch": 0.17094956292411384,
+      "grad_norm": 5.395960330963135,
+      "learning_rate": 1.9749139323686628e-05,
+      "loss": 1.6404,
+      "step": 245800
+    },
+    {
+      "epoch": 0.17108865939516682,
+      "grad_norm": 3.34220027923584,
+      "learning_rate": 1.9748733383150624e-05,
+      "loss": 1.6915,
+      "step": 246000
+    },
+    {
+      "epoch": 0.1712277558662198,
+      "grad_norm": 2.9125590324401855,
+      "learning_rate": 1.974832712043783e-05,
+      "loss": 1.6057,
+      "step": 246200
+    },
+    {
+      "epoch": 0.17136685233727278,
+      "grad_norm": 5.961441516876221,
+      "learning_rate": 1.974792053556764e-05,
+      "loss": 1.6639,
+      "step": 246400
+    },
+    {
+      "epoch": 0.17150594880832576,
+      "grad_norm": 3.4587485790252686,
+      "learning_rate": 1.9747513628559473e-05,
+      "loss": 1.667,
+      "step": 246600
+    },
+    {
+      "epoch": 0.17164504527937874,
+      "grad_norm": 3.578892946243286,
+      "learning_rate": 1.974710639943274e-05,
+      "loss": 1.6889,
+      "step": 246800
+    },
+    {
+      "epoch": 0.17178414175043172,
+      "grad_norm": 4.567336082458496,
+      "learning_rate": 1.9746698848206897e-05,
+      "loss": 1.6884,
+      "step": 247000
+    },
+    {
+      "epoch": 0.1719232382214847,
+      "grad_norm": 4.0480523109436035,
+      "learning_rate": 1.974629097490139e-05,
+      "loss": 1.6463,
+      "step": 247200
     }
   ],
   "logging_steps": 200,
       "attributes": {}
     }
   },
+  "total_flos": 3.2903444675753165e+18,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31159c9e3ece420d10b679508751f56bfb33866580a857e3a293714f0a805ecb
 size 6840

 version https://git-lfs.github.com/spec/v1
+oid sha256:d05fe2fc1f3ea3381f9d45772e347485be6cf70ca63fca95b95759ca5f4677ea
 size 6840