tsavage68 commited on
Commit
58d6d5b
1 Parent(s): 6c0d620

End of training

Browse files
README.md CHANGED
@@ -6,18 +6,18 @@ tags:
6
  - sft
7
  - generated_from_trainer
8
  model-index:
9
- - name: UTI_L3_1000steps_1e6rate_SFT
10
  results: []
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
- # UTI_L3_1000steps_1e6rate_SFT
17
 
18
  This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 3.2252
21
 
22
  ## Model description
23
 
@@ -51,51 +51,51 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-------:|:----:|:---------------:|
54
- | 2.2806 | 0.3333 | 25 | 2.1563 |
55
- | 1.8955 | 0.6667 | 50 | 1.8431 |
56
- | 1.7404 | 1.0 | 75 | 1.7251 |
57
- | 1.5351 | 1.3333 | 100 | 1.7245 |
58
- | 1.5176 | 1.6667 | 125 | 1.7039 |
59
- | 1.4594 | 2.0 | 150 | 1.6754 |
60
- | 0.9453 | 2.3333 | 175 | 1.8231 |
61
- | 1.0165 | 2.6667 | 200 | 1.8012 |
62
- | 0.9754 | 3.0 | 225 | 1.7995 |
63
- | 0.4991 | 3.3333 | 250 | 2.1098 |
64
- | 0.4971 | 3.6667 | 275 | 2.1236 |
65
- | 0.5339 | 4.0 | 300 | 2.1092 |
66
- | 0.2532 | 4.3333 | 325 | 2.2857 |
67
- | 0.2919 | 4.6667 | 350 | 2.3444 |
68
- | 0.3192 | 5.0 | 375 | 2.3734 |
69
- | 0.1858 | 5.3333 | 400 | 2.5514 |
70
- | 0.1947 | 5.6667 | 425 | 2.5828 |
71
- | 0.1984 | 6.0 | 450 | 2.5324 |
72
- | 0.1429 | 6.3333 | 475 | 2.7141 |
73
- | 0.1573 | 6.6667 | 500 | 2.6237 |
74
- | 0.1502 | 7.0 | 525 | 2.6715 |
75
- | 0.1168 | 7.3333 | 550 | 2.8434 |
76
- | 0.1306 | 7.6667 | 575 | 2.7996 |
77
- | 0.1182 | 8.0 | 600 | 2.8128 |
78
- | 0.1009 | 8.3333 | 625 | 2.9270 |
79
- | 0.1053 | 8.6667 | 650 | 2.9832 |
80
- | 0.0983 | 9.0 | 675 | 2.9935 |
81
- | 0.0887 | 9.3333 | 700 | 3.0662 |
82
- | 0.0894 | 9.6667 | 725 | 3.0845 |
83
- | 0.0914 | 10.0 | 750 | 3.0977 |
84
- | 0.0829 | 10.3333 | 775 | 3.1662 |
85
- | 0.0775 | 10.6667 | 800 | 3.1832 |
86
- | 0.0841 | 11.0 | 825 | 3.1821 |
87
- | 0.0753 | 11.3333 | 850 | 3.2082 |
88
- | 0.078 | 11.6667 | 875 | 3.2170 |
89
- | 0.0745 | 12.0 | 900 | 3.2223 |
90
- | 0.0788 | 12.3333 | 925 | 3.2260 |
91
- | 0.0743 | 12.6667 | 950 | 3.2258 |
92
- | 0.0718 | 13.0 | 975 | 3.2253 |
93
- | 0.0744 | 13.3333 | 1000 | 3.2252 |
94
 
95
 
96
  ### Framework versions
97
 
98
- - Transformers 4.41.1
99
  - Pytorch 2.0.0+cu117
100
- - Datasets 2.19.1
101
  - Tokenizers 0.19.1
 
6
  - sft
7
  - generated_from_trainer
8
  model-index:
9
+ - name: UTI_M2_1000steps_1e6rate_SFT
10
  results: []
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
  should probably proofread and complete it, then remove this comment. -->
15
 
16
+ # UTI_M2_1000steps_1e6rate_SFT
17
 
18
  This model is a fine-tuned version of [mistralai/Mistral-7B-Instruct-v0.2](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.7960
21
 
22
  ## Model description
23
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-------:|:----:|:---------------:|
54
+ | 2.2167 | 0.3333 | 25 | 1.1865 |
55
+ | 0.9806 | 0.6667 | 50 | 0.9618 |
56
+ | 0.936 | 1.0 | 75 | 0.9371 |
57
+ | 0.8294 | 1.3333 | 100 | 0.9512 |
58
+ | 0.8273 | 1.6667 | 125 | 0.9369 |
59
+ | 0.7851 | 2.0 | 150 | 0.9036 |
60
+ | 0.5263 | 2.3333 | 175 | 0.9990 |
61
+ | 0.5512 | 2.6667 | 200 | 0.9589 |
62
+ | 0.5272 | 3.0 | 225 | 0.9576 |
63
+ | 0.2888 | 3.3333 | 250 | 1.1371 |
64
+ | 0.2968 | 3.6667 | 275 | 1.1164 |
65
+ | 0.3381 | 4.0 | 300 | 1.1144 |
66
+ | 0.1802 | 4.3333 | 325 | 1.1697 |
67
+ | 0.2025 | 4.6667 | 350 | 1.1946 |
68
+ | 0.2273 | 5.0 | 375 | 1.2614 |
69
+ | 0.1417 | 5.3333 | 400 | 1.3260 |
70
+ | 0.1524 | 5.6667 | 425 | 1.3343 |
71
+ | 0.136 | 6.0 | 450 | 1.3735 |
72
+ | 0.117 | 6.3333 | 475 | 1.3843 |
73
+ | 0.1284 | 6.6667 | 500 | 1.3742 |
74
+ | 0.1172 | 7.0 | 525 | 1.4114 |
75
+ | 0.0905 | 7.3333 | 550 | 1.5000 |
76
+ | 0.1027 | 7.6667 | 575 | 1.5142 |
77
+ | 0.097 | 8.0 | 600 | 1.4912 |
78
+ | 0.0837 | 8.3333 | 625 | 1.5974 |
79
+ | 0.0832 | 8.6667 | 650 | 1.6185 |
80
+ | 0.0781 | 9.0 | 675 | 1.6203 |
81
+ | 0.0698 | 9.3333 | 700 | 1.6833 |
82
+ | 0.0722 | 9.6667 | 725 | 1.6960 |
83
+ | 0.0681 | 10.0 | 750 | 1.7139 |
84
+ | 0.0635 | 10.3333 | 775 | 1.7732 |
85
+ | 0.0654 | 10.6667 | 800 | 1.7704 |
86
+ | 0.0663 | 11.0 | 825 | 1.7647 |
87
+ | 0.0604 | 11.3333 | 850 | 1.7840 |
88
+ | 0.0628 | 11.6667 | 875 | 1.7916 |
89
+ | 0.0627 | 12.0 | 900 | 1.7947 |
90
+ | 0.061 | 12.3333 | 925 | 1.7962 |
91
+ | 0.062 | 12.6667 | 950 | 1.7967 |
92
+ | 0.0607 | 13.0 | 975 | 1.7960 |
93
+ | 0.0605 | 13.3333 | 1000 | 1.7960 |
94
 
95
 
96
  ### Framework versions
97
 
98
+ - Transformers 4.41.2
99
  - Pytorch 2.0.0+cu117
100
+ - Datasets 2.19.2
101
  - Tokenizers 0.19.1
config.json CHANGED
@@ -20,7 +20,7 @@
20
  "sliding_window": null,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
- "transformers_version": "4.41.1",
24
  "use_cache": false,
25
  "vocab_size": 32000
26
  }
 
20
  "sliding_window": null,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
+ "transformers_version": "4.41.2",
24
  "use_cache": false,
25
  "vocab_size": 32000
26
  }
final_checkpoint/config.json CHANGED
@@ -20,7 +20,7 @@
20
  "sliding_window": null,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
- "transformers_version": "4.41.1",
24
  "use_cache": false,
25
  "vocab_size": 32000
26
  }
 
20
  "sliding_window": null,
21
  "tie_word_embeddings": false,
22
  "torch_dtype": "float16",
23
+ "transformers_version": "4.41.2",
24
  "use_cache": false,
25
  "vocab_size": 32000
26
  }
final_checkpoint/generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.41.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.41.2"
6
  }
final_checkpoint/model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9625f6b54ab2367d0e80d43f0ca5e316021f803a8e7e5ed705dc695fe1aa01a1
3
  size 4943162240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e62ed3b72dc7a5fe13bb50e75c99aec82d8b4ddb930d9255a0ce990dd4758067
3
  size 4943162240
final_checkpoint/model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d44ed67cdf73e444bc9f5d4b18b455b4b3636f15d3d62b32f3dbe82319e1a5d3
3
  size 4999819232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11e0ebfbd84055ea6321452edeaff4191e7bb470e8b072270a6e62b04b0ba59f
3
  size 4999819232
final_checkpoint/model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94ee961d7520f7c2352131c968cf09d00ea85967e72d953c89b7ffe642e3555a
3
  size 4540516256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1455bdf4d2a45e55cf179acdb42249f4bd48df2af6c8c449bea31d9bc1edd45f
3
  size 4540516256
generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.41.1"
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "transformers_version": "4.41.2"
6
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9625f6b54ab2367d0e80d43f0ca5e316021f803a8e7e5ed705dc695fe1aa01a1
3
  size 4943162240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e62ed3b72dc7a5fe13bb50e75c99aec82d8b4ddb930d9255a0ce990dd4758067
3
  size 4943162240
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d44ed67cdf73e444bc9f5d4b18b455b4b3636f15d3d62b32f3dbe82319e1a5d3
3
  size 4999819232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11e0ebfbd84055ea6321452edeaff4191e7bb470e8b072270a6e62b04b0ba59f
3
  size 4999819232
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94ee961d7520f7c2352131c968cf09d00ea85967e72d953c89b7ffe642e3555a
3
  size 4540516256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1455bdf4d2a45e55cf179acdb42249f4bd48df2af6c8c449bea31d9bc1edd45f
3
  size 4540516256
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c45bcd614172a6f05421c77a790ee06c5359aaf0e059be06e17c405d8d27562a
3
  size 4603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4087ee1fad72c7539d1be4b6a2a09277949417a580d66b558e069226cf106161
3
  size 4603