ArpitaAeries/mistral-instruct-generation

Files changed (6) hide show

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.1204
 ## Model description
@@ -50,16 +50,16 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 2.0233        | 0.1389 | 5    | 2.0104          |
-| 1.9361        | 0.2778 | 10   | 1.9170          |
-| 1.8032        | 0.4167 | 15   | 1.7756          |
-| 1.6579        | 0.5556 | 20   | 1.6198          |
-| 1.5229        | 0.6944 | 25   | 1.4942          |
-| 1.404         | 0.8333 | 30   | 1.3784          |
-| 1.2981        | 0.9722 | 35   | 1.2781          |
-| 1.2117        | 1.1111 | 40   | 1.1949          |
-| 1.153         | 1.25   | 45   | 1.1428          |
-| 1.1245        | 1.3889 | 50   | 1.1204          |
 ### Framework versions

 This model is a fine-tuned version of [mistralai/Mixtral-8x7B-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-v0.1) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.1218
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 2.0253        | 0.1389 | 5    | 2.0089          |
+| 1.94          | 0.2778 | 10   | 1.9158          |
+| 1.8034        | 0.4167 | 15   | 1.7803          |
+| 1.6579        | 0.5556 | 20   | 1.6171          |
+| 1.5197        | 0.6944 | 25   | 1.4960          |
+| 1.4063        | 0.8333 | 30   | 1.3825          |
+| 1.3019        | 0.9722 | 35   | 1.2824          |
+| 1.2145        | 1.1111 | 40   | 1.2010          |
+| 1.1549        | 1.25   | 45   | 1.1446          |
+| 1.1269        | 1.3889 | 50   | 1.1218          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,14 +20,14 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "gate_proj",
-    "o_proj",
     "up_proj",
     "lm_head",
-    "k_proj",
-    "v_proj",
     "down_proj",
-    "q_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "q_proj",
+    "v_proj",
     "gate_proj",
     "up_proj",
     "lm_head",
     "down_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60d2399aed7f43aa33333bdb7eb1e74e0ef8f107fc88fbcf9d63e8b841df1e1b
 size 751667752

 version https://git-lfs.github.com/spec/v1
+oid sha256:a771b83c1c5bd08ea119cc6427e6c8135a5f966aafd8be28febf86ae437448dd
 size 751667752

runs/May21_11-59-13_vm/events.out.tfevents.1716297485.vm.56490.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:833d36636a9d928d62b085ac52e24f1d7ebcc6cf081fd12aa7bfc968e7a1cf7a
+size 18789

tokenizer.json CHANGED Viewed

@@ -1,21 +1,7 @@
 {
   "version": "1.0",
-  "truncation": {
-    "direction": "Right",
-    "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
-  },
-  "padding": {
-    "strategy": {
-      "Fixed": 512
-    },
-    "direction": "Right",
-    "pad_to_multiple_of": null,
-    "pad_id": 2,
-    "pad_type_id": 0,
-    "pad_token": "</s>"
-  },
   "added_tokens": [
     {
       "id": 0,

 {
   "version": "1.0",
+  "truncation": null,
+  "padding": null,
   "added_tokens": [
     {
       "id": 0,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:462f96db6884f7ac020a9a9bf5af678f286991e8740fd94cb5ff49d36dfe5a36
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b51b3fc93d654563abb6136b1926b7f2268f48b6215bae60a38ce5bb32ced7d
 size 5112