Training in progress, epoch 1

Browse files

Files changed (4) hide show

adapter_config.json +6 -3
adapter_model.safetensors +2 -2
trainer_state.json +317 -0
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -20,10 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "q_proj",
     "o_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "gate_proj",
+    "v_proj",
+    "down_proj",
+    "up_proj",
+    "q_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02d98634348ac35ca4646d3d30be57be7c7ef7800350abc4bc36484d49c91354
-size 7391832

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b4e76984a3e1a558b51b12e9f33b601eed99f29ddeb03c9f0861577550188a9
+size 39256704

trainer_state.json ADDED Viewed

	@@ -0,0 +1,317 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 410,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.24390243902439024,
+      "grad_norm": 7.625,
+      "learning_rate": 0.00019970658011837404,
+      "loss": 2.0086,
+      "step": 10
+    },
+    {
+      "epoch": 0.4878048780487805,
+      "grad_norm": 0.59033203125,
+      "learning_rate": 0.00019882804237803488,
+      "loss": 1.1443,
+      "step": 20
+    },
+    {
+      "epoch": 0.7317073170731707,
+      "grad_norm": 0.736328125,
+      "learning_rate": 0.00019736954238777792,
+      "loss": 0.985,
+      "step": 30
+    },
+    {
+      "epoch": 0.975609756097561,
+      "grad_norm": 0.50048828125,
+      "learning_rate": 0.00019533963920549306,
+      "loss": 0.918,
+      "step": 40
+    },
+    {
+      "epoch": 1.2195121951219512,
+      "grad_norm": 0.36083984375,
+      "learning_rate": 0.0001927502451102095,
+      "loss": 0.7771,
+      "step": 50
+    },
+    {
+      "epoch": 1.4634146341463414,
+      "grad_norm": 0.376953125,
+      "learning_rate": 0.00018961655569610557,
+      "loss": 0.8079,
+      "step": 60
+    },
+    {
+      "epoch": 1.7073170731707317,
+      "grad_norm": 0.314208984375,
+      "learning_rate": 0.00018595696069872013,
+      "loss": 0.7491,
+      "step": 70
+    },
+    {
+      "epoch": 1.951219512195122,
+      "grad_norm": 0.3203125,
+      "learning_rate": 0.00018179293607667178,
+      "loss": 0.7349,
+      "step": 80
+    },
+    {
+      "epoch": 2.1951219512195124,
+      "grad_norm": 0.331787109375,
+      "learning_rate": 0.0001771489179821943,
+      "loss": 0.7468,
+      "step": 90
+    },
+    {
+      "epoch": 2.4390243902439024,
+      "grad_norm": 0.374755859375,
+      "learning_rate": 0.0001720521593600787,
+      "loss": 0.7582,
+      "step": 100
+    },
+    {
+      "epoch": 2.682926829268293,
+      "grad_norm": 0.3662109375,
+      "learning_rate": 0.00016653257001655652,
+      "loss": 0.715,
+      "step": 110
+    },
+    {
+      "epoch": 2.926829268292683,
+      "grad_norm": 0.413818359375,
+      "learning_rate": 0.0001606225410966638,
+      "loss": 0.7404,
+      "step": 120
+    },
+    {
+      "epoch": 3.1707317073170733,
+      "grad_norm": 0.349609375,
+      "learning_rate": 0.00015435675500012212,
+      "loss": 0.6844,
+      "step": 130
+    },
+    {
+      "epoch": 3.4146341463414633,
+      "grad_norm": 0.48095703125,
+      "learning_rate": 0.0001477719818512263,
+      "loss": 0.7568,
+      "step": 140
+    },
+    {
+      "epoch": 3.658536585365854,
+      "grad_norm": 0.369384765625,
+      "learning_rate": 0.00014090686371713402,
+      "loss": 0.6647,
+      "step": 150
+    },
+    {
+      "epoch": 3.902439024390244,
+      "grad_norm": 0.422607421875,
+      "learning_rate": 0.00013380168784085027,
+      "loss": 0.698,
+      "step": 160
+    },
+    {
+      "epoch": 4.146341463414634,
+      "grad_norm": 0.38427734375,
+      "learning_rate": 0.0001264981502196662,
+      "loss": 0.6861,
+      "step": 170
+    },
+    {
+      "epoch": 4.390243902439025,
+      "grad_norm": 0.38525390625,
+      "learning_rate": 0.00011903911091646684,
+      "loss": 0.6855,
+      "step": 180
+    },
+    {
+      "epoch": 4.634146341463414,
+      "grad_norm": 0.384033203125,
+      "learning_rate": 0.00011146834253984006,
+      "loss": 0.6797,
+      "step": 190
+    },
+    {
+      "epoch": 4.878048780487805,
+      "grad_norm": 0.365966796875,
+      "learning_rate": 0.00010383027336900355,
+      "loss": 0.6865,
+      "step": 200
+    },
+    {
+      "epoch": 5.121951219512195,
+      "grad_norm": 0.39794921875,
+      "learning_rate": 9.616972663099647e-05,
+      "loss": 0.6859,
+      "step": 210
+    },
+    {
+      "epoch": 5.365853658536586,
+      "grad_norm": 0.3837890625,
+      "learning_rate": 8.853165746015997e-05,
+      "loss": 0.6636,
+      "step": 220
+    },
+    {
+      "epoch": 5.609756097560975,
+      "grad_norm": 0.445068359375,
+      "learning_rate": 8.096088908353315e-05,
+      "loss": 0.6883,
+      "step": 230
+    },
+    {
+      "epoch": 5.853658536585366,
+      "grad_norm": 0.4130859375,
+      "learning_rate": 7.350184978033386e-05,
+      "loss": 0.6432,
+      "step": 240
+    },
+    {
+      "epoch": 6.097560975609756,
+      "grad_norm": 0.380859375,
+      "learning_rate": 6.619831215914974e-05,
+      "loss": 0.6597,
+      "step": 250
+    },
+    {
+      "epoch": 6.341463414634147,
+      "grad_norm": 0.407958984375,
+      "learning_rate": 5.909313628286601e-05,
+      "loss": 0.6574,
+      "step": 260
+    },
+    {
+      "epoch": 6.585365853658536,
+      "grad_norm": 0.3974609375,
+      "learning_rate": 5.222801814877369e-05,
+      "loss": 0.6499,
+      "step": 270
+    },
+    {
+      "epoch": 6.829268292682927,
+      "grad_norm": 0.40625,
+      "learning_rate": 4.56432449998779e-05,
+      "loss": 0.6436,
+      "step": 280
+    },
+    {
+      "epoch": 7.073170731707317,
+      "grad_norm": 0.3994140625,
+      "learning_rate": 3.937745890333623e-05,
+      "loss": 0.6644,
+      "step": 290
+    },
+    {
+      "epoch": 7.317073170731708,
+      "grad_norm": 0.40673828125,
+      "learning_rate": 3.346742998344348e-05,
+      "loss": 0.656,
+      "step": 300
+    },
+    {
+      "epoch": 7.560975609756097,
+      "grad_norm": 0.408447265625,
+      "learning_rate": 2.794784063992131e-05,
+      "loss": 0.6244,
+      "step": 310
+    },
+    {
+      "epoch": 7.804878048780488,
+      "grad_norm": 0.41015625,
+      "learning_rate": 2.2851082017805703e-05,
+      "loss": 0.6594,
+      "step": 320
+    },
+    {
+      "epoch": 8.048780487804878,
+      "grad_norm": 0.437744140625,
+      "learning_rate": 1.8207063923328237e-05,
+      "loss": 0.6423,
+      "step": 330
+    },
+    {
+      "epoch": 8.292682926829269,
+      "grad_norm": 0.43212890625,
+      "learning_rate": 1.4043039301279903e-05,
+      "loss": 0.635,
+      "step": 340
+    },
+    {
+      "epoch": 8.536585365853659,
+      "grad_norm": 0.4013671875,
+      "learning_rate": 1.0383444303894452e-05,
+      "loss": 0.6408,
+      "step": 350
+    },
+    {
+      "epoch": 8.78048780487805,
+      "grad_norm": 0.40869140625,
+      "learning_rate": 7.249754889790539e-06,
+      "loss": 0.6438,
+      "step": 360
+    },
+    {
+      "epoch": 9.024390243902438,
+      "grad_norm": 0.3857421875,
+      "learning_rate": 4.660360794506946e-06,
+      "loss": 0.6282,
+      "step": 370
+    },
+    {
+      "epoch": 9.268292682926829,
+      "grad_norm": 0.400634765625,
+      "learning_rate": 2.6304576122221035e-06,
+      "loss": 0.6359,
+      "step": 380
+    },
+    {
+      "epoch": 9.512195121951219,
+      "grad_norm": 0.3935546875,
+      "learning_rate": 1.1719576219651585e-06,
+      "loss": 0.6467,
+      "step": 390
+    },
+    {
+      "epoch": 9.75609756097561,
+      "grad_norm": 0.4140625,
+      "learning_rate": 2.934198816259559e-07,
+      "loss": 0.6479,
+      "step": 400
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.400390625,
+      "learning_rate": 0.0,
+      "loss": 0.6203,
+      "step": 410
+    },
+    {
+      "epoch": 10.0,
+      "step": 410,
+      "total_flos": 2.000692923334656e+16,
+      "train_loss": 0.7381390141277778,
+      "train_runtime": 366.1191,
+      "train_samples_per_second": 4.479,
+      "train_steps_per_second": 1.12
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 410,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 2.000692923334656e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1c53e2098ab48f443df2689d4e6dbdb3d25526eeb63e57661f0a0ff2d5df4b1
 size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:4efee21f229df2d1d33be74e3b8c2ff4958f5d456ec8d17ee1d29d524be5c3ee
 size 5048