Kreses commited on
Commit
a7adc73
1 Parent(s): d263f05

End of training

Browse files
Files changed (3) hide show
  1. README.md +15 -15
  2. adapter_config.json +4 -4
  3. adapter_model.safetensors +2 -2
README.md CHANGED
@@ -4,7 +4,7 @@ tags:
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
- base_model: meta-llama/Llama-2-7b-chat-hf
8
  model-index:
9
  - name: output
10
  results: []
@@ -15,9 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # output
17
 
18
- This model is a fine-tuned version of [meta-llama/Llama-2-7b-chat-hf](https://huggingface.co/meta-llama/Llama-2-7b-chat-hf) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.5112
21
 
22
  ## Model description
23
 
@@ -51,18 +51,18 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | 1.9596 | 0.29 | 1 | 1.7030 |
55
- | 2.0029 | 0.57 | 2 | 1.6746 |
56
- | 1.9649 | 0.86 | 3 | 1.6343 |
57
- | 1.8472 | 1.14 | 4 | 1.6023 |
58
- | 1.8243 | 1.43 | 5 | 1.5890 |
59
- | 1.8297 | 1.71 | 6 | 1.5809 |
60
- | 1.8483 | 2.0 | 7 | 1.5683 |
61
- | 1.7739 | 2.29 | 8 | 1.5528 |
62
- | 1.8205 | 2.57 | 9 | 1.5378 |
63
- | 1.7415 | 2.86 | 10 | 1.5262 |
64
- | 1.6532 | 3.14 | 11 | 1.5178 |
65
- | 1.7671 | 3.43 | 12 | 1.5112 |
66
 
67
 
68
  ### Framework versions
 
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
+ base_model: meta-llama/Llama-2-70b-chat-hf
8
  model-index:
9
  - name: output
10
  results: []
 
15
 
16
  # output
17
 
18
+ This model is a fine-tuned version of [meta-llama/Llama-2-70b-chat-hf](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.3387
21
 
22
  ## Model description
23
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | 1.7334 | 0.29 | 1 | 1.5084 |
55
+ | 1.7705 | 0.57 | 2 | 1.4977 |
56
+ | 1.7433 | 0.86 | 3 | 1.4736 |
57
+ | 1.6862 | 1.14 | 4 | 1.4434 |
58
+ | 1.6562 | 1.43 | 5 | 1.4161 |
59
+ | 1.615 | 1.71 | 6 | 1.3948 |
60
+ | 1.6227 | 2.0 | 7 | 1.3813 |
61
+ | 1.5609 | 2.29 | 8 | 1.3706 |
62
+ | 1.619 | 2.57 | 9 | 1.3603 |
63
+ | 1.5298 | 2.86 | 10 | 1.3511 |
64
+ | 1.4428 | 3.14 | 11 | 1.3437 |
65
+ | 1.5641 | 3.43 | 12 | 1.3387 |
66
 
67
 
68
  ### Framework versions
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "meta-llama/Llama-2-7b-chat-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -19,10 +19,10 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "v_proj",
23
- "q_proj",
24
  "k_proj",
25
- "o_proj"
 
 
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "meta-llama/Llama-2-70b-chat-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "k_proj",
23
+ "q_proj",
24
+ "o_proj",
25
+ "v_proj"
26
  ],
27
  "task_type": "CAUSAL_LM",
28
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae952f03c8703076c365c1f560e6bc44b26bdaeaff581851ad2b643316bb9b06
3
- size 67143296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:130835e790a62f150122503aa714b42ef32ec051654681b66876de6fa52a1a9d
3
+ size 262231096