Training in progress, epoch 0

Browse files

Files changed (4) hide show

adapter_config.json +4 -2
adapter_model.safetensors +2 -2
trainer_state.json +317 -0
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -20,10 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
     "v_proj",
     "q_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
+    "fc1",
     "q_proj",
+    "k_proj",
+    "fc2",
+    "dense"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:58f75d622aecf59b2ee126d74a55847290971d6e250e272002d16645c33846de
-size 15754264

 version https://git-lfs.github.com/spec/v1
+oid sha256:261e0270c9dbf399b11e2fb87774b201c48297b789370c78af65f541e42ee3a4
+size 47236352

trainer_state.json ADDED Viewed

	@@ -0,0 +1,317 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.820359281437126,
+  "eval_steps": 500,
+  "global_step": 410,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.23952095808383234,
+      "grad_norm": 2.16796875,
+      "learning_rate": 0.00019970658011837404,
+      "loss": 0.5442,
+      "step": 10
+    },
+    {
+      "epoch": 0.47904191616766467,
+      "grad_norm": 2.02734375,
+      "learning_rate": 0.00019882804237803488,
+      "loss": 0.5445,
+      "step": 20
+    },
+    {
+      "epoch": 0.718562874251497,
+      "grad_norm": 1.2587890625,
+      "learning_rate": 0.00019736954238777792,
+      "loss": 0.5499,
+      "step": 30
+    },
+    {
+      "epoch": 0.9580838323353293,
+      "grad_norm": 1.3486328125,
+      "learning_rate": 0.00019533963920549306,
+      "loss": 0.5429,
+      "step": 40
+    },
+    {
+      "epoch": 1.1976047904191618,
+      "grad_norm": 2.08984375,
+      "learning_rate": 0.0001927502451102095,
+      "loss": 0.5336,
+      "step": 50
+    },
+    {
+      "epoch": 1.437125748502994,
+      "grad_norm": 1.7734375,
+      "learning_rate": 0.00018961655569610557,
+      "loss": 0.5333,
+      "step": 60
+    },
+    {
+      "epoch": 1.6766467065868262,
+      "grad_norm": 2.63671875,
+      "learning_rate": 0.00018595696069872013,
+      "loss": 0.5401,
+      "step": 70
+    },
+    {
+      "epoch": 1.9161676646706587,
+      "grad_norm": 2.19921875,
+      "learning_rate": 0.00018179293607667178,
+      "loss": 0.5414,
+      "step": 80
+    },
+    {
+      "epoch": 2.155688622754491,
+      "grad_norm": 1.908203125,
+      "learning_rate": 0.0001771489179821943,
+      "loss": 0.5713,
+      "step": 90
+    },
+    {
+      "epoch": 2.3952095808383236,
+      "grad_norm": 4.09765625,
+      "learning_rate": 0.0001720521593600787,
+      "loss": 0.538,
+      "step": 100
+    },
+    {
+      "epoch": 2.6347305389221556,
+      "grad_norm": 2.0390625,
+      "learning_rate": 0.00016653257001655652,
+      "loss": 0.5199,
+      "step": 110
+    },
+    {
+      "epoch": 2.874251497005988,
+      "grad_norm": 1.8154296875,
+      "learning_rate": 0.0001606225410966638,
+      "loss": 0.5204,
+      "step": 120
+    },
+    {
+      "epoch": 3.1137724550898205,
+      "grad_norm": 1.9189453125,
+      "learning_rate": 0.00015435675500012212,
+      "loss": 0.5396,
+      "step": 130
+    },
+    {
+      "epoch": 3.3532934131736525,
+      "grad_norm": 1.8896484375,
+      "learning_rate": 0.0001477719818512263,
+      "loss": 0.5294,
+      "step": 140
+    },
+    {
+      "epoch": 3.592814371257485,
+      "grad_norm": 2.302734375,
+      "learning_rate": 0.00014090686371713402,
+      "loss": 0.5394,
+      "step": 150
+    },
+    {
+      "epoch": 3.8323353293413174,
+      "grad_norm": 2.16796875,
+      "learning_rate": 0.00013380168784085027,
+      "loss": 0.5279,
+      "step": 160
+    },
+    {
+      "epoch": 4.07185628742515,
+      "grad_norm": 2.11328125,
+      "learning_rate": 0.0001264981502196662,
+      "loss": 0.5182,
+      "step": 170
+    },
+    {
+      "epoch": 4.311377245508982,
+      "grad_norm": 1.9189453125,
+      "learning_rate": 0.00011903911091646684,
+      "loss": 0.5304,
+      "step": 180
+    },
+    {
+      "epoch": 4.550898203592815,
+      "grad_norm": 2.48046875,
+      "learning_rate": 0.00011146834253984006,
+      "loss": 0.5386,
+      "step": 190
+    },
+    {
+      "epoch": 4.790419161676647,
+      "grad_norm": 2.62890625,
+      "learning_rate": 0.00010383027336900355,
+      "loss": 0.5276,
+      "step": 200
+    },
+    {
+      "epoch": 5.029940119760479,
+      "grad_norm": 3.021484375,
+      "learning_rate": 9.616972663099647e-05,
+      "loss": 0.5229,
+      "step": 210
+    },
+    {
+      "epoch": 5.269461077844311,
+      "grad_norm": 3.259765625,
+      "learning_rate": 8.853165746015997e-05,
+      "loss": 0.5243,
+      "step": 220
+    },
+    {
+      "epoch": 5.508982035928144,
+      "grad_norm": 2.876953125,
+      "learning_rate": 8.096088908353315e-05,
+      "loss": 0.5206,
+      "step": 230
+    },
+    {
+      "epoch": 5.748502994011976,
+      "grad_norm": 3.349609375,
+      "learning_rate": 7.350184978033386e-05,
+      "loss": 0.5374,
+      "step": 240
+    },
+    {
+      "epoch": 5.9880239520958085,
+      "grad_norm": 3.791015625,
+      "learning_rate": 6.619831215914974e-05,
+      "loss": 0.5258,
+      "step": 250
+    },
+    {
+      "epoch": 6.227544910179641,
+      "grad_norm": 3.240234375,
+      "learning_rate": 5.909313628286601e-05,
+      "loss": 0.5166,
+      "step": 260
+    },
+    {
+      "epoch": 6.467065868263473,
+      "grad_norm": 3.248046875,
+      "learning_rate": 5.222801814877369e-05,
+      "loss": 0.5329,
+      "step": 270
+    },
+    {
+      "epoch": 6.706586826347305,
+      "grad_norm": 2.83984375,
+      "learning_rate": 4.56432449998779e-05,
+      "loss": 0.5117,
+      "step": 280
+    },
+    {
+      "epoch": 6.946107784431137,
+      "grad_norm": 4.03515625,
+      "learning_rate": 3.937745890333623e-05,
+      "loss": 0.5269,
+      "step": 290
+    },
+    {
+      "epoch": 7.18562874251497,
+      "grad_norm": 4.08203125,
+      "learning_rate": 3.346742998344348e-05,
+      "loss": 0.5299,
+      "step": 300
+    },
+    {
+      "epoch": 7.425149700598802,
+      "grad_norm": 3.03125,
+      "learning_rate": 2.794784063992131e-05,
+      "loss": 0.5217,
+      "step": 310
+    },
+    {
+      "epoch": 7.664670658682635,
+      "grad_norm": 3.00390625,
+      "learning_rate": 2.2851082017805703e-05,
+      "loss": 0.5241,
+      "step": 320
+    },
+    {
+      "epoch": 7.904191616766467,
+      "grad_norm": 3.27734375,
+      "learning_rate": 1.8207063923328237e-05,
+      "loss": 0.5066,
+      "step": 330
+    },
+    {
+      "epoch": 8.1437125748503,
+      "grad_norm": 2.927734375,
+      "learning_rate": 1.4043039301279903e-05,
+      "loss": 0.5111,
+      "step": 340
+    },
+    {
+      "epoch": 8.383233532934131,
+      "grad_norm": 3.3984375,
+      "learning_rate": 1.0383444303894452e-05,
+      "loss": 0.5137,
+      "step": 350
+    },
+    {
+      "epoch": 8.622754491017965,
+      "grad_norm": 3.4375,
+      "learning_rate": 7.249754889790539e-06,
+      "loss": 0.5279,
+      "step": 360
+    },
+    {
+      "epoch": 8.862275449101796,
+      "grad_norm": 3.19140625,
+      "learning_rate": 4.660360794506946e-06,
+      "loss": 0.5125,
+      "step": 370
+    },
+    {
+      "epoch": 9.10179640718563,
+      "grad_norm": 4.23046875,
+      "learning_rate": 2.6304576122221035e-06,
+      "loss": 0.5141,
+      "step": 380
+    },
+    {
+      "epoch": 9.341317365269461,
+      "grad_norm": 2.779296875,
+      "learning_rate": 1.1719576219651585e-06,
+      "loss": 0.5172,
+      "step": 390
+    },
+    {
+      "epoch": 9.580838323353294,
+      "grad_norm": 3.619140625,
+      "learning_rate": 2.934198816259559e-07,
+      "loss": 0.5389,
+      "step": 400
+    },
+    {
+      "epoch": 9.820359281437126,
+      "grad_norm": 4.30078125,
+      "learning_rate": 0.0,
+      "loss": 0.5024,
+      "step": 410
+    },
+    {
+      "epoch": 9.820359281437126,
+      "step": 410,
+      "total_flos": 2.67670788243456e+16,
+      "train_loss": 0.5285330202521348,
+      "train_runtime": 355.5253,
+      "train_samples_per_second": 4.697,
+      "train_steps_per_second": 1.153
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 410,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 2.67670788243456e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9efc63e0db152fbc14de7f7d79d93ad4b05a1ce37ab0ca1ae9d360f5e9303158
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:c06099d4eb235513a1439a552237439d2e6e7a121a8c6162b02c3f4b29f578e8
 size 4984