Training in progress, step 22400, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +87 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
     "q_proj",
-    "gate_proj",
-    "down_proj",
     "up_proj",
-    "k_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
     "up_proj",
+    "v_proj",
+    "down_proj",
+    "o_proj",
+    "gate_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fb5d4a8e80692092cbb6ea69a8cd6902d7306436bd2ef4db3d268beb6d254345
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:20e56293ea86924617db1d72bddea10585e4be98a582ddca170c081dd3d642c9
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5ed3c0ef76d1963f5e466fac078e7bdfa634f48c1b91aaad0c42e6ae39ac1315
 size 85736914

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a7c5ec127b3617a18b3d72d14dceb0ba197088d23e289ef92a6837ce8b5e15f
 size 85736914

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:adc80d060b3614ce93cf171cfa62b9a17755ba93a67b6a802cd87b68b0a907bb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4762419da8cab351088827e4869a7225442c8f1f7484efb21afc6c2799818eb
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:55ba296f12eae4fc0511f08bee2c4fc9bf546a7ddf44e3e343ade2d2cae2ed71
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:47e872382e84dc07d00209e557644b4bf1503898fe7aa404af9c426170e40ad4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.02974652244561034,
   "eval_steps": 2000,
-  "global_step": 20000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -707,6 +707,90 @@
       "learning_rate": 1.99982565504712e-05,
       "loss": 1.5135,
       "step": 20000
     }
   ],
   "logging_steps": 200,
@@ -714,7 +798,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
-  "total_flos": 3.6983529369430426e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.033316105139083584,
   "eval_steps": 2000,
+  "global_step": 22400,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.99982565504712e-05,
       "loss": 1.5135,
       "step": 20000
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.2242326736450195,
+      "learning_rate": 1.9998221652689703e-05,
+      "loss": 1.5325,
+      "step": 20200
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.6685123443603516,
+      "learning_rate": 1.9998186409125715e-05,
+      "loss": 1.5765,
+      "step": 20400
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.8478649854660034,
+      "learning_rate": 1.999815064006636e-05,
+      "loss": 1.5597,
+      "step": 20600
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.6228203773498535,
+      "learning_rate": 1.999811452174307e-05,
+      "loss": 1.5312,
+      "step": 20800
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.2979694604873657,
+      "learning_rate": 1.9998078054157092e-05,
+      "loss": 1.5863,
+      "step": 21000
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.4286555051803589,
+      "learning_rate": 1.999804123730971e-05,
+      "loss": 1.5265,
+      "step": 21200
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.2393202781677246,
+      "learning_rate": 1.999800407120221e-05,
+      "loss": 1.5599,
+      "step": 21400
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 8.066116333007812,
+      "learning_rate": 1.9997966555835886e-05,
+      "loss": 1.5345,
+      "step": 21600
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.866185426712036,
+      "learning_rate": 1.9997928691212052e-05,
+      "loss": 1.5141,
+      "step": 21800
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.5764899253845215,
+      "learning_rate": 1.9997890477332027e-05,
+      "loss": 1.5189,
+      "step": 22000
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 3.597501039505005,
+      "learning_rate": 1.9997851914197147e-05,
+      "loss": 1.5368,
+      "step": 22200
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 2.5888760089874268,
+      "learning_rate": 1.9997813001808763e-05,
+      "loss": 1.5603,
+      "step": 22400
     }
   ],
   "logging_steps": 200,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 5,
   "save_steps": 200,
+  "total_flos": 4.327171237194056e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7da6c64555cac2ee9fc43605b134636b4610ed6cd244e07ab7cb4ce3c058548c
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a7800ffb29813f89bbd542587a923598ba056a5186a554421f6b09a6d22b374
 size 4920