greatakela
/

mistral_instruct_classify30k

Text Generation

Generated from Trainer

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

greatakela commited on Dec 7, 2023

Commit

bc0401b

·

1 Parent(s): e2d3231

greatakela/mistral_instruct_classify30k_adapters

Files changed (4) hide show

README.md +6 -6
adapter_config.json +6 -3
adapter_model.safetensors +1 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3994
 ## Model description
@@ -47,11 +47,11 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 0.5749        | 0.04  | 169  | 0.5691          |
-| 0.4581        | 1.04  | 338  | 0.4742          |
-| 0.3675        | 2.04  | 507  | 0.4266          |
-| 0.3239        | 3.04  | 676  | 0.4020          |
-| 0.285         | 4.04  | 845  | 0.3994          |
 ### Framework versions

 This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4072
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 0.5959        | 0.04  | 163  | 0.5867          |
+| 0.4753        | 1.04  | 326  | 0.4860          |
+| 0.3975        | 2.04  | 489  | 0.4321          |
+| 0.3355        | 3.04  | 652  | 0.4098          |
+| 0.2969        | 4.04  | 815  | 0.4072          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -8,18 +8,21 @@
   "init_lora_weights": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "lora_alpha": 16,
   "lora_dropout": 0.1,
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
     "k_proj",
-    "v_proj",
-    "o_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "init_lora_weights": true,
   "layers_pattern": null,
   "layers_to_transform": null,
+  "loftq_config": {},
   "lora_alpha": 16,
   "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
   "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "q_proj",
+    "o_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e7588cb6fd20816705712b5687c49d5981072a9550d62cab7f433969e946b7d2
 size 218138576

 version https://git-lfs.github.com/spec/v1
+oid sha256:b2d8cf5d7bb8cf3cd435ed15237343409b620835cf8a4a1c306f1dbc262466ef
 size 218138576

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d42a4fdd9a23d1a30ae662b9550004532401f7780234d6dce18183ca9c88d82a
-size 4728

 version https://git-lfs.github.com/spec/v1
+oid sha256:923634e75927a4714f08879fa36c1ec3ccac00d1cf3ba87379b8ebf5b64ff26e
+size 4600