andrewverse commited on
Commit
e0e66ad
1 Parent(s): 533cb7a

andrewverse/andrew-tweet-comment-ft

Browse files
README.md CHANGED
@@ -3,7 +3,7 @@ license: apache-2.0
3
  library_name: peft
4
  tags:
5
  - generated_from_trainer
6
- base_model: TheBloke/Mistral-7B-Instruct-v0.2-GPTQ
7
  model-index:
8
  - name: andewbot-ft
9
  results: []
@@ -14,9 +14,9 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # andewbot-ft
16
 
17
- This model is a fine-tuned version of [TheBloke/Mistral-7B-Instruct-v0.2-GPTQ](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GPTQ) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
- - Loss: 0.6661
20
 
21
  ## Model description
22
 
@@ -51,22 +51,22 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | 4.1587 | 0.95 | 10 | 2.6161 |
55
- | 1.7609 | 2.0 | 21 | 1.0614 |
56
- | 0.9542 | 2.95 | 31 | 0.7537 |
57
- | 0.72 | 4.0 | 42 | 0.6809 |
58
- | 0.7436 | 4.95 | 52 | 0.6734 |
59
- | 0.6609 | 6.0 | 63 | 0.6692 |
60
- | 0.7176 | 6.95 | 73 | 0.6672 |
61
- | 0.644 | 8.0 | 84 | 0.6661 |
62
- | 0.7019 | 8.95 | 94 | 0.6660 |
63
- | 0.613 | 9.52 | 100 | 0.6661 |
64
 
65
 
66
  ### Framework versions
67
 
68
- - PEFT 0.10.0
69
  - Transformers 4.39.1
70
- - Pytorch 2.1.0+cu118
71
- - Datasets 2.18.0
72
  - Tokenizers 0.15.2
 
3
  library_name: peft
4
  tags:
5
  - generated_from_trainer
6
+ base_model: mistralai/Mistral-7B-Instruct-v0.2
7
  model-index:
8
  - name: andewbot-ft
9
  results: []
 
14
 
15
  # andewbot-ft
16
 
17
+ This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
18
  It achieves the following results on the evaluation set:
19
+ - Loss: 0.6775
20
 
21
  ## Model description
22
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | 4.1535 | 0.95 | 10 | 2.6726 |
55
+ | 1.7854 | 2.0 | 21 | 1.1175 |
56
+ | 0.9754 | 2.95 | 31 | 0.7767 |
57
+ | 0.7311 | 4.0 | 42 | 0.7044 |
58
+ | 0.7424 | 4.95 | 52 | 0.6842 |
59
+ | 0.6566 | 6.0 | 63 | 0.6814 |
60
+ | 0.7099 | 6.95 | 73 | 0.6785 |
61
+ | 0.6374 | 8.0 | 84 | 0.6784 |
62
+ | 0.6966 | 8.95 | 94 | 0.6776 |
63
+ | 0.6014 | 9.52 | 100 | 0.6775 |
64
 
65
 
66
  ### Framework versions
67
 
68
+ - PEFT 0.8.2
69
  - Transformers 4.39.1
70
+ - Pytorch 2.2.0+cu121
71
+ - Datasets 2.17.1
72
  - Tokenizers 0.15.2
adapter_config.json CHANGED
@@ -1,12 +1,11 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
9
- "layer_replication": null,
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
@@ -23,6 +22,5 @@
23
  "q_proj"
24
  ],
25
  "task_type": "CAUSAL_LM",
26
- "use_dora": false,
27
  "use_rslora": false
28
  }
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
8
  "init_lora_weights": true,
 
9
  "layers_pattern": null,
10
  "layers_to_transform": null,
11
  "loftq_config": {},
 
22
  "q_proj"
23
  ],
24
  "task_type": "CAUSAL_LM",
 
25
  "use_rslora": false
26
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30c74b27c6fa4ee480a016d713a8ba54c244b9254ef74c67251a441279f2dc05
3
  size 8397056
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:337ad31ace741e9b61262d429e9632e8bcafd0893ce0d2987703d428a03b5a92
3
  size 8397056
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:645a341de43658aae1b21e944f7af5673c2200d636353b6617630dc796212a29
3
- size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f64ebf22e5a56891a7604cd47565a595fb5c04071514fe2793059ee7f34560e
3
+ size 4920