sujithatz commited on
Commit
c3e7e4c
1 Parent(s): f5c4cea

sujithatz/finbot-transofrmer-based-phi3.5_adapter

Browse files
README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.7215
22
 
23
  ## Model description
24
 
@@ -38,11 +38,11 @@ More information needed
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 0.0002
41
- - train_batch_size: 8
42
- - eval_batch_size: 8
43
  - seed: 3407
44
  - gradient_accumulation_steps: 4
45
- - total_train_batch_size: 32
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_steps: 5
@@ -52,30 +52,30 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-------:|:----:|:---------------:|
55
- | 1.4909 | 1.1765 | 5 | 1.3154 |
56
- | 0.9704 | 2.3529 | 10 | 0.9087 |
57
- | 0.6673 | 3.5294 | 15 | 0.6343 |
58
- | 0.4418 | 4.7059 | 20 | 0.5075 |
59
- | 0.3375 | 5.8824 | 25 | 0.4491 |
60
- | 0.3033 | 7.0588 | 30 | 0.4069 |
61
- | 0.244 | 8.2353 | 35 | 0.3828 |
62
- | 0.2285 | 9.4118 | 40 | 0.3759 |
63
- | 0.1519 | 10.5882 | 45 | 0.3896 |
64
- | 0.1334 | 11.7647 | 50 | 0.4114 |
65
- | 0.099 | 12.9412 | 55 | 0.4291 |
66
- | 0.0823 | 14.1176 | 60 | 0.4610 |
67
- | 0.06 | 15.2941 | 65 | 0.4894 |
68
- | 0.0548 | 16.4706 | 70 | 0.5345 |
69
- | 0.0437 | 17.6471 | 75 | 0.5747 |
70
- | 0.0409 | 18.8235 | 80 | 0.6059 |
71
- | 0.0386 | 20.0 | 85 | 0.6349 |
72
- | 0.0272 | 21.1765 | 90 | 0.6590 |
73
- | 0.0262 | 22.3529 | 95 | 0.6933 |
74
- | 0.0303 | 23.5294 | 100 | 0.6960 |
75
- | 0.0249 | 24.7059 | 105 | 0.7021 |
76
- | 0.0291 | 25.8824 | 110 | 0.7173 |
77
- | 0.0255 | 27.0588 | 115 | 0.7195 |
78
- | 0.0208 | 28.2353 | 120 | 0.7215 |
79
 
80
 
81
  ### Framework versions
 
18
 
19
  This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.3840
22
 
23
  ## Model description
24
 
 
38
 
39
  The following hyperparameters were used during training:
40
  - learning_rate: 0.0002
41
+ - train_batch_size: 4
42
+ - eval_batch_size: 4
43
  - seed: 3407
44
  - gradient_accumulation_steps: 4
45
+ - total_train_batch_size: 16
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - lr_scheduler_warmup_steps: 5
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-------:|:----:|:---------------:|
55
+ | 1.5071 | 0.5882 | 5 | 1.4674 |
56
+ | 1.1659 | 1.1765 | 10 | 1.0849 |
57
+ | 0.894 | 1.7647 | 15 | 0.8655 |
58
+ | 0.7243 | 2.3529 | 20 | 0.6989 |
59
+ | 0.5752 | 2.9412 | 25 | 0.5856 |
60
+ | 0.5724 | 3.5294 | 30 | 0.5257 |
61
+ | 0.4834 | 4.1176 | 35 | 0.4875 |
62
+ | 0.3861 | 4.7059 | 40 | 0.4588 |
63
+ | 0.35 | 5.2941 | 45 | 0.4368 |
64
+ | 0.3126 | 5.8824 | 50 | 0.4251 |
65
+ | 0.367 | 6.4706 | 55 | 0.4080 |
66
+ | 0.2792 | 7.0588 | 60 | 0.3955 |
67
+ | 0.3952 | 7.6471 | 65 | 0.3914 |
68
+ | 0.2854 | 8.2353 | 70 | 0.3784 |
69
+ | 0.3224 | 8.8235 | 75 | 0.3867 |
70
+ | 0.3187 | 9.4118 | 80 | 0.3765 |
71
+ | 0.1675 | 10.0 | 85 | 0.3799 |
72
+ | 0.1888 | 10.5882 | 90 | 0.3858 |
73
+ | 0.2021 | 11.1765 | 95 | 0.3759 |
74
+ | 0.1518 | 11.7647 | 100 | 0.3868 |
75
+ | 0.2075 | 12.3529 | 105 | 0.3915 |
76
+ | 0.1497 | 12.9412 | 110 | 0.3814 |
77
+ | 0.1797 | 13.5294 | 115 | 0.3821 |
78
+ | 0.1606 | 14.1176 | 120 | 0.3840 |
79
 
80
 
81
  ### Framework versions
adapter_config.json CHANGED
@@ -20,10 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "o_proj",
24
  "down_proj",
25
- "gate_up_proj",
26
- "qkv_proj"
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "gate_proj",
24
+ "up_proj",
25
  "o_proj",
26
  "down_proj",
27
+ "k_proj",
28
+ "q_proj",
29
+ "v_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d0d087ec02232d98e64e6f6b528eebfa7ca7a0bf61f2f00fe1c0991fe80fee6
3
- size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5db02aecfe4e8679acdaf54528b4994462fd68c005d87e3419662df44736163f
3
+ size 35668592
tokenizer.json CHANGED
@@ -155,12 +155,6 @@
155
  "id": "A",
156
  "type_id": 0
157
  }
158
- },
159
- {
160
- "SpecialToken": {
161
- "id": "<|endoftext|>",
162
- "type_id": 0
163
- }
164
  }
165
  ],
166
  "pair": [
@@ -170,36 +164,14 @@
170
  "type_id": 0
171
  }
172
  },
173
- {
174
- "SpecialToken": {
175
- "id": "<|endoftext|>",
176
- "type_id": 0
177
- }
178
- },
179
  {
180
  "Sequence": {
181
  "id": "B",
182
  "type_id": 1
183
  }
184
- },
185
- {
186
- "SpecialToken": {
187
- "id": "<|endoftext|>",
188
- "type_id": 1
189
- }
190
  }
191
  ],
192
- "special_tokens": {
193
- "<|endoftext|>": {
194
- "id": "<|endoftext|>",
195
- "ids": [
196
- 32000
197
- ],
198
- "tokens": [
199
- "<|endoftext|>"
200
- ]
201
- }
202
- }
203
  },
204
  "decoder": {
205
  "type": "Sequence",
 
155
  "id": "A",
156
  "type_id": 0
157
  }
 
 
 
 
 
 
158
  }
159
  ],
160
  "pair": [
 
164
  "type_id": 0
165
  }
166
  },
 
 
 
 
 
 
167
  {
168
  "Sequence": {
169
  "id": "B",
170
  "type_id": 1
171
  }
 
 
 
 
 
 
172
  }
173
  ],
174
+ "special_tokens": {}
 
 
 
 
 
 
 
 
 
 
175
  },
176
  "decoder": {
177
  "type": "Sequence",
tokenizer_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "add_bos_token": false,
3
- "add_eos_token": true,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
@@ -121,9 +121,9 @@
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,
124
- "model_max_length": 131072,
125
  "pad_token": "<unk>",
126
- "padding_side": "left",
127
  "sp_model_kwargs": {},
128
  "tokenizer_class": "LlamaTokenizer",
129
  "unk_token": "<unk>",
 
1
  {
2
  "add_bos_token": false,
3
+ "add_eos_token": false,
4
  "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
 
121
  "clean_up_tokenization_spaces": false,
122
  "eos_token": "<|endoftext|>",
123
  "legacy": false,
124
+ "model_max_length": 300,
125
  "pad_token": "<unk>",
126
+ "padding_side": "right",
127
  "sp_model_kwargs": {},
128
  "tokenizer_class": "LlamaTokenizer",
129
  "unk_token": "<unk>",
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae64cfbc9fafa79992f8f1dbc59d731406c1a3c9322aa24ad9ba448b90f16c6e
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e6d562609b6199ed2dc3c6d856a06eb939e0a4a026ced7b500b84e4f340b13
3
  size 5432