Adapter

Files changed (7) hide show

README.md CHANGED Viewed

@@ -44,6 +44,10 @@ The following hyperparameters were used during training:
 - training_steps: 10
 - mixed_precision_training: Native AMP
 ### Framework versions
 - PEFT 0.8.2

 - training_steps: 10
 - mixed_precision_training: Native AMP
+### Training results
 ### Framework versions
 - PEFT 0.8.2

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
     "k_proj",
     "v_proj",
-    "gate_proj",
-    "o_proj",
     "q_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
     "k_proj",
     "v_proj",
+    "down_proj",
     "q_proj",
+    "o_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:130309b46a9db0bda2443a36d5ecaa79e8f390aadee720706f17cf5cbba5cf4b
 size 159967880

 version https://git-lfs.github.com/spec/v1
+oid sha256:8475ca784de92c0eb4f168b0933c30be4344b58a26d81c01c3ff31c0adf6d5f8
 size 159967880

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 0.0,
-    "total_flos": 333801262080000.0,
-    "train_loss": 1.2641676783561706,
-    "train_runtime": 52.6099,
-    "train_samples_per_second": 0.76,
-    "train_steps_per_second": 0.19
 }

 {
     "epoch": 0.0,
+    "total_flos": 324056878080000.0,
+    "train_loss": 1.359846395254135,
+    "train_runtime": 36.0797,
+    "train_samples_per_second": 1.109,
+    "train_steps_per_second": 0.277
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 0.0,
-    "total_flos": 333801262080000.0,
-    "train_loss": 1.2641676783561706,
-    "train_runtime": 52.6099,
-    "train_samples_per_second": 0.76,
-    "train_steps_per_second": 0.19
 }

 {
     "epoch": 0.0,
+    "total_flos": 324056878080000.0,
+    "train_loss": 1.359846395254135,
+    "train_runtime": 36.0797,
+    "train_samples_per_second": 1.109,
+    "train_steps_per_second": 0.277
 }

trainer_state.json CHANGED Viewed

@@ -23,59 +23,59 @@
     {
       "epoch": 0.0,
       "learning_rate": 0.000175,
-      "loss": 1.4764,
       "step": 3
     },
     {
       "epoch": 0.0,
       "learning_rate": 0.00015000000000000001,
-      "loss": 0.9966,
       "step": 4
     },
     {
       "epoch": 0.0,
       "learning_rate": 0.000125,
-      "loss": 1.339,
       "step": 5
     },
     {
       "epoch": 0.0,
       "learning_rate": 0.0001,
-      "loss": 1.0031,
       "step": 6
     },
     {
       "epoch": 0.0,
       "learning_rate": 7.500000000000001e-05,
-      "loss": 0.9943,
       "step": 7
     },
     {
       "epoch": 0.0,
       "learning_rate": 5e-05,
-      "loss": 1.1203,
       "step": 8
     },
     {
       "epoch": 0.0,
       "learning_rate": 2.5e-05,
-      "loss": 0.9072,
       "step": 9
     },
     {
       "epoch": 0.0,
       "learning_rate": 0.0,
-      "loss": 0.9334,
       "step": 10
     },
     {
       "epoch": 0.0,
       "step": 10,
-      "total_flos": 333801262080000.0,
-      "train_loss": 1.2641676783561706,
-      "train_runtime": 52.6099,
-      "train_samples_per_second": 0.76,
-      "train_steps_per_second": 0.19
     }
   ],
   "logging_steps": 1,
@@ -83,7 +83,7 @@
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
-  "total_flos": 333801262080000.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

     {
       "epoch": 0.0,
       "learning_rate": 0.000175,
+      "loss": 1.6356,
       "step": 3
     },
     {
       "epoch": 0.0,
       "learning_rate": 0.00015000000000000001,
+      "loss": 1.3577,
       "step": 4
     },
     {
       "epoch": 0.0,
       "learning_rate": 0.000125,
+      "loss": 1.3596,
       "step": 5
     },
     {
       "epoch": 0.0,
       "learning_rate": 0.0001,
+      "loss": 1.0625,
       "step": 6
     },
     {
       "epoch": 0.0,
       "learning_rate": 7.500000000000001e-05,
+      "loss": 1.0769,
       "step": 7
     },
     {
       "epoch": 0.0,
       "learning_rate": 5e-05,
+      "loss": 1.2269,
       "step": 8
     },
     {
       "epoch": 0.0,
       "learning_rate": 2.5e-05,
+      "loss": 1.0216,
       "step": 9
     },
     {
       "epoch": 0.0,
       "learning_rate": 0.0,
+      "loss": 0.9863,
       "step": 10
     },
     {
       "epoch": 0.0,
       "step": 10,
+      "total_flos": 324056878080000.0,
+      "train_loss": 1.359846395254135,
+      "train_runtime": 36.0797,
+      "train_samples_per_second": 1.109,
+      "train_steps_per_second": 0.277
     }
   ],
   "logging_steps": 1,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 500,
+  "total_flos": 324056878080000.0,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c032efc104fa0fece03810eb29ba53e2dbcaccc5a8663387f92be442f23a24e2
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa88d497c0724abe443769abdd278e76866c1fb542f8770422dffae4852c481f
 size 4664