R2aillc/mixtral-8x7bi-ffiec-finetuned

Browse files

Files changed (4) hide show

README.md +26 -26
adapter_config.json +5 -5
adapter_model.safetensors +1 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7382
 ## Model description
@@ -52,31 +52,31 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 2.2053        | 1.0   | 18   | 1.6050          |
-| 1.685         | 2.0   | 36   | 1.4131          |
-| 1.4913        | 3.0   | 54   | 1.2708          |
-| 1.3079        | 4.0   | 72   | 1.1353          |
-| 1.1686        | 5.0   | 90   | 1.0360          |
-| 1.1356        | 6.0   | 108  | 0.9803          |
-| 1.053         | 7.0   | 126  | 0.9382          |
-| 0.9968        | 8.0   | 144  | 0.8994          |
-| 0.947         | 9.0   | 162  | 0.8616          |
-| 0.8981        | 10.0  | 180  | 0.8244          |
-| 0.8726        | 11.0  | 198  | 0.7891          |
-| 0.8211        | 12.0  | 216  | 0.7727          |
-| 0.8126        | 13.0  | 234  | 0.7645          |
-| 0.7889        | 14.0  | 252  | 0.7591          |
-| 0.7837        | 15.0  | 270  | 0.7547          |
-| 0.7792        | 16.0  | 288  | 0.7510          |
-| 0.7507        | 17.0  | 306  | 0.7481          |
-| 0.7483        | 18.0  | 324  | 0.7458          |
-| 0.7429        | 19.0  | 342  | 0.7432          |
-| 0.7362        | 20.0  | 360  | 0.7417          |
-| 0.7495        | 21.0  | 378  | 0.7406          |
-| 0.748         | 22.0  | 396  | 0.7397          |
-| 0.7429        | 23.0  | 414  | 0.7388          |
-| 0.7444        | 24.0  | 432  | 0.7384          |
-| 0.7334        | 25.0  | 450  | 0.7382          |
 ### Framework versions

 This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.7671
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 3.0101        | 1.0   | 18   | 2.1741          |
+| 2.1612        | 2.0   | 36   | 1.5962          |
+| 1.6591        | 3.0   | 54   | 1.4202          |
+| 1.4985        | 4.0   | 72   | 1.3035          |
+| 1.3585        | 5.0   | 90   | 1.1977          |
+| 1.294         | 6.0   | 108  | 1.0993          |
+| 1.1823        | 7.0   | 126  | 1.0139          |
+| 1.0983        | 8.0   | 144  | 0.9641          |
+| 1.0371        | 9.0   | 162  | 0.9293          |
+| 0.9868        | 10.0  | 180  | 0.8961          |
+| 0.9535        | 11.0  | 198  | 0.8655          |
+| 0.9259        | 12.0  | 216  | 0.8358          |
+| 0.882         | 13.0  | 234  | 0.8067          |
+| 0.8472        | 14.0  | 252  | 0.7938          |
+| 0.8484        | 15.0  | 270  | 0.7872          |
+| 0.8215        | 16.0  | 288  | 0.7826          |
+| 0.8167        | 17.0  | 306  | 0.7779          |
+| 0.8199        | 18.0  | 324  | 0.7751          |
+| 0.8042        | 19.0  | 342  | 0.7730          |
+| 0.8186        | 20.0  | 360  | 0.7710          |
+| 0.794         | 21.0  | 378  | 0.7698          |
+| 0.7958        | 22.0  | 396  | 0.7685          |
+| 0.7858        | 23.0  | 414  | 0.7677          |
+| 0.7857        | 24.0  | 432  | 0.7671          |
+| 0.7843        | 25.0  | 450  | 0.7671          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,14 +19,14 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
     "up_proj",
     "v_proj",
-    "gate_proj",
-    "lm_head",
-    "k_proj",
-    "down_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "down_proj",
     "q_proj",
+    "gate_proj",
+    "o_proj",
     "up_proj",
     "v_proj",
+    "lm_head"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f57f7b1ee3a2c2ce71bf01d20a2870af431718e2188114a64752255ec49cdc8e
 size 751667752

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce233a3572c72f9aa4d5d56052f8efb4341f16756cefe74df9371af6e3456388
 size 751667752

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:abadc159dd59bb3b8b115028c76a800030dd77fd80570bf1337812d9be5a7b70
-size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:511ab54ce8eea56fc125463bf9a1c308882f819f5a367c15e6a14f094b2ae779
+size 4728