Training in progress, step 25600, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_config.json +3 -3
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +80 -3

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
     "k_proj",
     "up_proj",
-    "v_proj",
     "q_proj",
     "gate_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "down_proj",
     "up_proj",
     "q_proj",
     "gate_proj",
+    "o_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c7f93eabe6ac3c54d66eaa201fd02227487a26f7778737a9f254ff462e973e4
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:b0d94edbec8149dbe77b5714eec5b67014b9ae262c0869f1aa19043d8ecf1190
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:73914a91d2c85155b01b949bf4bca1b47684ea4ff1764db4a597aed9516fd5ab
 size 85736914

 version https://git-lfs.github.com/spec/v1
+oid sha256:a14c1f59df5a9c6088d17c4ae86f9dd5081681dd4f24842cc845731cd5c6bd83
 size 85736914

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f8c4f9076fe893e7a53debd026211a9b9066658d86f31864434230c495759f3
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:78d8aea77898e9b91f9eb081b5eb89090ec0b6f85c7c2b88fe32a844a809dee9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8114cada636c19d5007acc44352f3b0449c4b5ebb7cb27bcb702507137d58166
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e12782ff1de906d4f9075c6686b1b4fec69203509c9a4e2f4c19898a0d5f4d8e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0696068625227282,
   "eval_steps": 2000,
-  "global_step": 23400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -826,6 +826,83 @@
       "learning_rate": 1.999045551990737e-05,
       "loss": 1.536,
       "step": 23400
     }
   ],
   "logging_steps": 200,
@@ -833,7 +910,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
-  "total_flos": 4.745472228553851e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07615109746076248,
   "eval_steps": 2000,
+  "global_step": 25600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.999045551990737e-05,
       "loss": 1.536,
       "step": 23400
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.7328152656555176,
+      "learning_rate": 1.9990292364225084e-05,
+      "loss": 1.5166,
+      "step": 23600
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.3295519351959229,
+      "learning_rate": 1.9990126996188935e-05,
+      "loss": 1.5488,
+      "step": 23800
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 2.1913652420043945,
+      "learning_rate": 1.998996023220988e-05,
+      "loss": 1.5219,
+      "step": 24000
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.8065701723098755,
+      "learning_rate": 1.998979207231122e-05,
+      "loss": 1.5181,
+      "step": 24200
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.8158023357391357,
+      "learning_rate": 1.998962336776768e-05,
+      "loss": 1.5176,
+      "step": 24400
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.4990816116333008,
+      "learning_rate": 1.9989452423079802e-05,
+      "loss": 1.4998,
+      "step": 24600
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.655572533607483,
+      "learning_rate": 1.9989280082543273e-05,
+      "loss": 1.5426,
+      "step": 24800
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.9679639339447021,
+      "learning_rate": 1.9989106346182187e-05,
+      "loss": 1.5603,
+      "step": 25000
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.2155619859695435,
+      "learning_rate": 1.9988931214020803e-05,
+      "loss": 1.5368,
+      "step": 25200
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.3557419776916504,
+      "learning_rate": 1.9988754686083607e-05,
+      "loss": 1.531,
+      "step": 25400
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.433875322341919,
+      "learning_rate": 1.998857676239526e-05,
+      "loss": 1.5502,
+      "step": 25600
     }
   ],
   "logging_steps": 200,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
+  "total_flos": 5.89264804322771e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null