greatakela commited on
Commit
bc0401b
·
1 Parent(s): e2d3231

greatakela/mistral_instruct_classify30k_adapters

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 0.3994
19
 
20
  ## Model description
21
 
@@ -47,11 +47,11 @@ The following hyperparameters were used during training:
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
- | 0.5749 | 0.04 | 169 | 0.5691 |
51
- | 0.4581 | 1.04 | 338 | 0.4742 |
52
- | 0.3675 | 2.04 | 507 | 0.4266 |
53
- | 0.3239 | 3.04 | 676 | 0.4020 |
54
- | 0.285 | 4.04 | 845 | 0.3994 |
55
 
56
 
57
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.1](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 0.4072
19
 
20
  ## Model description
21
 
 
47
 
48
  | Training Loss | Epoch | Step | Validation Loss |
49
  |:-------------:|:-----:|:----:|:---------------:|
50
+ | 0.5959 | 0.04 | 163 | 0.5867 |
51
+ | 0.4753 | 1.04 | 326 | 0.4860 |
52
+ | 0.3975 | 2.04 | 489 | 0.4321 |
53
+ | 0.3355 | 3.04 | 652 | 0.4098 |
54
+ | 0.2969 | 4.04 | 815 | 0.4072 |
55
 
56
 
57
  ### Framework versions
adapter_config.json CHANGED
@@ -8,18 +8,21 @@
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
 
11
  "lora_alpha": 16,
12
  "lora_dropout": 0.1,
 
 
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
  "r": 64,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "q_proj",
20
  "k_proj",
21
- "v_proj",
22
- "o_proj"
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
8
  "init_lora_weights": true,
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
+ "loftq_config": {},
12
  "lora_alpha": 16,
13
  "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
  "r": 64,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
22
  "k_proj",
23
+ "q_proj",
24
+ "o_proj",
25
+ "v_proj"
26
  ],
27
  "task_type": "CAUSAL_LM"
28
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7588cb6fd20816705712b5687c49d5981072a9550d62cab7f433969e946b7d2
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d8cf5d7bb8cf3cd435ed15237343409b620835cf8a4a1c306f1dbc262466ef
3
  size 218138576
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d42a4fdd9a23d1a30ae662b9550004532401f7780234d6dce18183ca9c88d82a
3
- size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:923634e75927a4714f08879fa36c1ec3ccac00d1cf3ba87379b8ebf5b64ff26e
3
+ size 4600