Training in progress, epoch 0

Browse files

Files changed (4) hide show

adapter_config.json +6 -3
adapter_model.safetensors +2 -2
trainer_state.json +261 -0
training_args.bin +1 -1

adapter_config.json CHANGED Viewed

@@ -20,10 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "q_proj",
     "o_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "o_proj",
+    "gate_proj",
+    "v_proj",
+    "down_proj",
+    "up_proj",
+    "q_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0267e1dc72a535e6d4ed4c05d820f45d51a4919544efca46b60e7c5d8660e327
-size 7391832

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc70a86e448fb3d30ed5fb637cb6d53a7df7d84b9be49ac0ba6abacc944c3b60
+size 39256704

trainer_state.json ADDED Viewed

	@@ -0,0 +1,261 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 9.777777777777779,
+  "eval_steps": 500,
+  "global_step": 330,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.2962962962962963,
+      "grad_norm": 0.83837890625,
+      "learning_rate": 0.00019954719225730847,
+      "loss": 1.6268,
+      "step": 10
+    },
+    {
+      "epoch": 0.5925925925925926,
+      "grad_norm": 0.63134765625,
+      "learning_rate": 0.00019819286972627066,
+      "loss": 1.0878,
+      "step": 20
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.63623046875,
+      "learning_rate": 0.00019594929736144976,
+      "loss": 0.9758,
+      "step": 30
+    },
+    {
+      "epoch": 1.1851851851851851,
+      "grad_norm": 0.54345703125,
+      "learning_rate": 0.00019283679330160726,
+      "loss": 0.8484,
+      "step": 40
+    },
+    {
+      "epoch": 1.4814814814814814,
+      "grad_norm": 0.355712890625,
+      "learning_rate": 0.00018888354486549237,
+      "loss": 0.7642,
+      "step": 50
+    },
+    {
+      "epoch": 1.7777777777777777,
+      "grad_norm": 0.90869140625,
+      "learning_rate": 0.00018412535328311814,
+      "loss": 0.7435,
+      "step": 60
+    },
+    {
+      "epoch": 2.074074074074074,
+      "grad_norm": 0.35498046875,
+      "learning_rate": 0.00017860530947427875,
+      "loss": 0.7328,
+      "step": 70
+    },
+    {
+      "epoch": 2.3703703703703702,
+      "grad_norm": 0.37548828125,
+      "learning_rate": 0.00017237340381050703,
+      "loss": 0.7266,
+      "step": 80
+    },
+    {
+      "epoch": 2.6666666666666665,
+      "grad_norm": 0.319091796875,
+      "learning_rate": 0.00016548607339452853,
+      "loss": 0.7161,
+      "step": 90
+    },
+    {
+      "epoch": 2.962962962962963,
+      "grad_norm": 0.446044921875,
+      "learning_rate": 0.00015800569095711982,
+      "loss": 0.6811,
+      "step": 100
+    },
+    {
+      "epoch": 3.259259259259259,
+      "grad_norm": 0.3486328125,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 0.6885,
+      "step": 110
+    },
+    {
+      "epoch": 3.5555555555555554,
+      "grad_norm": 0.312255859375,
+      "learning_rate": 0.00014154150130018866,
+      "loss": 0.6989,
+      "step": 120
+    },
+    {
+      "epoch": 3.851851851851852,
+      "grad_norm": 0.402099609375,
+      "learning_rate": 0.00013270679633174218,
+      "loss": 0.6812,
+      "step": 130
+    },
+    {
+      "epoch": 4.148148148148148,
+      "grad_norm": 0.3740234375,
+      "learning_rate": 0.00012357589355094275,
+      "loss": 0.6903,
+      "step": 140
+    },
+    {
+      "epoch": 4.444444444444445,
+      "grad_norm": 0.400390625,
+      "learning_rate": 0.00011423148382732853,
+      "loss": 0.6746,
+      "step": 150
+    },
+    {
+      "epoch": 4.7407407407407405,
+      "grad_norm": 0.3525390625,
+      "learning_rate": 0.00010475819158237425,
+      "loss": 0.6626,
+      "step": 160
+    },
+    {
+      "epoch": 5.037037037037037,
+      "grad_norm": 0.328857421875,
+      "learning_rate": 9.524180841762577e-05,
+      "loss": 0.6716,
+      "step": 170
+    },
+    {
+      "epoch": 5.333333333333333,
+      "grad_norm": 0.336669921875,
+      "learning_rate": 8.57685161726715e-05,
+      "loss": 0.6314,
+      "step": 180
+    },
+    {
+      "epoch": 5.62962962962963,
+      "grad_norm": 0.336669921875,
+      "learning_rate": 7.642410644905726e-05,
+      "loss": 0.649,
+      "step": 190
+    },
+    {
+      "epoch": 5.925925925925926,
+      "grad_norm": 1.4248046875,
+      "learning_rate": 6.729320366825784e-05,
+      "loss": 0.6906,
+      "step": 200
+    },
+    {
+      "epoch": 6.222222222222222,
+      "grad_norm": 0.349365234375,
+      "learning_rate": 5.845849869981137e-05,
+      "loss": 0.644,
+      "step": 210
+    },
+    {
+      "epoch": 6.518518518518518,
+      "grad_norm": 0.36474609375,
+      "learning_rate": 5.000000000000002e-05,
+      "loss": 0.6315,
+      "step": 220
+    },
+    {
+      "epoch": 6.814814814814815,
+      "grad_norm": 0.377197265625,
+      "learning_rate": 4.19943090428802e-05,
+      "loss": 0.6505,
+      "step": 230
+    },
+    {
+      "epoch": 7.111111111111111,
+      "grad_norm": 0.349853515625,
+      "learning_rate": 3.45139266054715e-05,
+      "loss": 0.6518,
+      "step": 240
+    },
+    {
+      "epoch": 7.407407407407407,
+      "grad_norm": 0.40966796875,
+      "learning_rate": 2.7626596189492983e-05,
+      "loss": 0.6243,
+      "step": 250
+    },
+    {
+      "epoch": 7.703703703703704,
+      "grad_norm": 0.359619140625,
+      "learning_rate": 2.139469052572127e-05,
+      "loss": 0.6406,
+      "step": 260
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.385498046875,
+      "learning_rate": 1.587464671688187e-05,
+      "loss": 0.655,
+      "step": 270
+    },
+    {
+      "epoch": 8.296296296296296,
+      "grad_norm": 0.386474609375,
+      "learning_rate": 1.1116455134507664e-05,
+      "loss": 0.6513,
+      "step": 280
+    },
+    {
+      "epoch": 8.592592592592592,
+      "grad_norm": 0.384765625,
+      "learning_rate": 7.163206698392744e-06,
+      "loss": 0.6311,
+      "step": 290
+    },
+    {
+      "epoch": 8.88888888888889,
+      "grad_norm": 0.341064453125,
+      "learning_rate": 4.050702638550275e-06,
+      "loss": 0.6223,
+      "step": 300
+    },
+    {
+      "epoch": 9.185185185185185,
+      "grad_norm": 0.36572265625,
+      "learning_rate": 1.8071302737293295e-06,
+      "loss": 0.6275,
+      "step": 310
+    },
+    {
+      "epoch": 9.481481481481481,
+      "grad_norm": 0.380859375,
+      "learning_rate": 4.5280774269154115e-07,
+      "loss": 0.6198,
+      "step": 320
+    },
+    {
+      "epoch": 9.777777777777779,
+      "grad_norm": 0.39892578125,
+      "learning_rate": 0.0,
+      "loss": 0.6337,
+      "step": 330
+    },
+    {
+      "epoch": 9.777777777777779,
+      "step": 330,
+      "total_flos": 1.610313816342528e+16,
+      "train_loss": 0.7250095107338645,
+      "train_runtime": 297.4234,
+      "train_samples_per_second": 4.539,
+      "train_steps_per_second": 1.11
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 330,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 500,
+  "total_flos": 1.610313816342528e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7eacda1a7e1c653f90acea1259fc8a20025e0f2b7b10b26de3392a64de37ec7f
 size 4984

 version https://git-lfs.github.com/spec/v1
+oid sha256:69f9e9dbb1fdd559ae4a1758eca13113983624533b910e20a5d7a199ea7c52bf
 size 4984