ludziej commited on
Commit
3436e55
1 Parent(s): 014b724

End of training

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 0.8854
17
 
18
  ## Model description
19
 
@@ -44,16 +44,16 @@ The following hyperparameters were used during training:
44
 
45
  | Training Loss | Epoch | Step | Validation Loss |
46
  |:-------------:|:-----:|:----:|:---------------:|
47
- | 3.0459 | 1.0 | 5 | 2.4452 |
48
- | 2.2781 | 2.0 | 10 | 2.0536 |
49
- | 1.883 | 3.0 | 15 | 1.6599 |
50
- | 1.5519 | 4.0 | 20 | 1.4205 |
51
- | 1.3589 | 5.0 | 25 | 1.2785 |
52
- | 1.2145 | 6.0 | 30 | 1.1344 |
53
- | 1.0845 | 7.0 | 35 | 1.0183 |
54
- | 0.9945 | 8.0 | 40 | 0.9482 |
55
- | 0.9314 | 9.0 | 45 | 0.8990 |
56
- | 0.8972 | 10.0 | 50 | 0.8854 |
57
 
58
 
59
  ### Framework versions
 
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 0.8243
17
 
18
  ## Model description
19
 
 
44
 
45
  | Training Loss | Epoch | Step | Validation Loss |
46
  |:-------------:|:-----:|:----:|:---------------:|
47
+ | 2.8893 | 1.0 | 6 | 2.1271 |
48
+ | 1.9669 | 2.0 | 12 | 1.8233 |
49
+ | 1.6822 | 3.0 | 18 | 1.5134 |
50
+ | 1.3991 | 4.0 | 24 | 1.2639 |
51
+ | 1.1731 | 5.0 | 30 | 1.1030 |
52
+ | 1.0664 | 6.0 | 36 | 0.9758 |
53
+ | 0.9729 | 7.0 | 42 | 0.9245 |
54
+ | 0.9171 | 8.0 | 48 | 0.8696 |
55
+ | 0.8722 | 9.0 | 54 | 0.8384 |
56
+ | 0.8487 | 10.0 | 60 | 0.8243 |
57
 
58
 
59
  ### Framework versions
config.json CHANGED
@@ -78,9 +78,9 @@
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "use_cache": true,
81
- "vocab_size": 51
82
  },
83
- "decoder_start_token_id": 49,
84
  "encoder": {
85
  "_name_or_path": "",
86
  "add_cross_attention": false,
@@ -157,12 +157,12 @@
157
  "typical_p": 1.0,
158
  "use_bfloat16": false,
159
  "use_cache": true,
160
- "vocab_size": 51
161
  },
162
- "eos_token_id": 50,
163
  "is_encoder_decoder": true,
164
  "model_type": "encoder-decoder",
165
- "pad_token_id": 48,
166
  "torch_dtype": "float32",
167
  "transformers_version": "4.37.2"
168
  }
 
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "use_cache": true,
81
+ "vocab_size": 56
82
  },
83
+ "decoder_start_token_id": 1,
84
  "encoder": {
85
  "_name_or_path": "",
86
  "add_cross_attention": false,
 
157
  "typical_p": 1.0,
158
  "use_bfloat16": false,
159
  "use_cache": true,
160
+ "vocab_size": 56
161
  },
162
+ "eos_token_id": 6,
163
  "is_encoder_decoder": true,
164
  "model_type": "encoder-decoder",
165
+ "pad_token_id": 3,
166
  "torch_dtype": "float32",
167
  "transformers_version": "4.37.2"
168
  }
generation_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "_from_model_config": true,
3
- "decoder_start_token_id": 49,
4
- "eos_token_id": 50,
5
- "pad_token_id": 48,
6
  "transformers_version": "4.37.2"
7
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "decoder_start_token_id": 1,
4
+ "eos_token_id": 6,
5
+ "pad_token_id": 3,
6
  "transformers_version": "4.37.2"
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d68eedd7c2ce4a569ddec467812d03352c8a7b222c55e6dde6b7c022a7ea4859
3
- size 31203500
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c786ce574c7b54427c775c1c401bc60b3a2a1bf4da9d34ebd224728b1d9fdb1
3
+ size 31213760
runs/Feb28_13-26-30_3897ec21fae5/events.out.tfevents.1709126790.3897ec21fae5.17803.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8918b9c98527482fb0ae44af3cc57d56c66e466f5fc8108c4c2b57a3b6988cb3
3
+ size 4184
runs/Feb28_13-28-13_3897ec21fae5/events.out.tfevents.1709126894.3897ec21fae5.18243.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f49d302e9b6efdd89a1ff829ad435c123743f3b816dcc97ded912b68c1e009e
3
+ size 12864
tokenizer.json CHANGED
@@ -4,8 +4,8 @@
4
  "padding": null,
5
  "added_tokens": [
6
  {
7
- "id": 48,
8
- "content": "[PAD]",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
@@ -13,7 +13,7 @@
13
  "special": true
14
  },
15
  {
16
- "id": 49,
17
  "content": "[CLS]",
18
  "single_word": false,
19
  "lstrip": false,
@@ -22,7 +22,43 @@
22
  "special": true
23
  },
24
  {
25
- "id": 50,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  "content": "[EOS]",
27
  "single_word": false,
28
  "lstrip": false,
@@ -40,86 +76,94 @@
40
  "model": {
41
  "type": "BPE",
42
  "dropout": null,
43
- "unk_token": null,
44
  "continuing_subword_prefix": null,
45
  "end_of_word_suffix": null,
46
  "fuse_unk": false,
47
  "byte_fallback": false,
48
  "vocab": {
49
- "+": 0,
50
- "-": 1,
51
- "0": 2,
52
- "1": 3,
53
- "2": 4,
54
- "3": 5,
55
- "4": 6,
56
- "5": 7,
57
- "6": 8,
58
- "7": 9,
59
- "8": 10,
60
- "9": 11,
61
- "=": 12,
62
- "10": 13,
63
- "99": 14,
64
- "11": 15,
65
- "98": 16,
66
- "12": 17,
67
- "97": 18,
68
- "96": 19,
69
- "13": 20,
70
- "14": 21,
71
- "95": 22,
72
- "15": 23,
73
- "94": 24,
74
- "16": 25,
75
- "93": 26,
76
- "17": 27,
77
- "92": 28,
78
- "18": 29,
79
- "91": 30,
80
- "19": 31,
81
- "90": 32,
82
- "20": 33,
83
- "89": 34,
84
- "88": 35,
85
- "21": 36,
86
- "22": 37,
87
- "87": 38,
88
- "23": 39,
89
- "86": 40,
90
- "24": 41,
91
- "85": 42,
92
- "84": 43,
93
- "25": 44,
94
- "26": 45,
95
- "83": 46,
96
- "27": 47
 
 
 
 
 
 
 
 
97
  },
98
  "merges": [
99
- "1 0",
100
  "9 9",
101
- "1 1",
102
  "9 8",
 
103
  "1 2",
104
  "9 7",
105
- "9 6",
106
  "1 3",
107
- "1 4",
108
  "9 5",
109
- "1 5",
110
  "9 4",
 
111
  "1 6",
112
  "9 3",
113
  "1 7",
114
  "9 2",
115
- "1 8",
116
  "9 1",
 
117
  "1 9",
118
  "9 0",
119
  "2 0",
120
  "8 9",
121
- "8 8",
122
  "2 1",
 
123
  "2 2",
124
  "8 7",
125
  "2 3",
@@ -130,7 +174,8 @@
130
  "2 5",
131
  "2 6",
132
  "8 3",
133
- "2 7"
 
134
  ]
135
  }
136
  }
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
7
+ "id": 0,
8
+ "content": "[UNK]",
9
  "single_word": false,
10
  "lstrip": false,
11
  "rstrip": false,
 
13
  "special": true
14
  },
15
  {
16
+ "id": 1,
17
  "content": "[CLS]",
18
  "single_word": false,
19
  "lstrip": false,
 
22
  "special": true
23
  },
24
  {
25
+ "id": 2,
26
+ "content": "[SEP]",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ },
33
+ {
34
+ "id": 3,
35
+ "content": "[PAD]",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 4,
44
+ "content": "[MASK]",
45
+ "single_word": false,
46
+ "lstrip": false,
47
+ "rstrip": false,
48
+ "normalized": false,
49
+ "special": true
50
+ },
51
+ {
52
+ "id": 5,
53
+ "content": "[BOS]",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 6,
62
  "content": "[EOS]",
63
  "single_word": false,
64
  "lstrip": false,
 
76
  "model": {
77
  "type": "BPE",
78
  "dropout": null,
79
+ "unk_token": "[UNK]",
80
  "continuing_subword_prefix": null,
81
  "end_of_word_suffix": null,
82
  "fuse_unk": false,
83
  "byte_fallback": false,
84
  "vocab": {
85
+ "[UNK]": 0,
86
+ "[CLS]": 1,
87
+ "[SEP]": 2,
88
+ "[PAD]": 3,
89
+ "[MASK]": 4,
90
+ "[BOS]": 5,
91
+ "[EOS]": 6,
92
+ "+": 7,
93
+ "-": 8,
94
+ "0": 9,
95
+ "1": 10,
96
+ "2": 11,
97
+ "3": 12,
98
+ "4": 13,
99
+ "5": 14,
100
+ "6": 15,
101
+ "7": 16,
102
+ "8": 17,
103
+ "9": 18,
104
+ "=": 19,
105
+ "99": 20,
106
+ "10": 21,
107
+ "98": 22,
108
+ "11": 23,
109
+ "12": 24,
110
+ "97": 25,
111
+ "13": 26,
112
+ "96": 27,
113
+ "95": 28,
114
+ "14": 29,
115
+ "94": 30,
116
+ "15": 31,
117
+ "16": 32,
118
+ "93": 33,
119
+ "17": 34,
120
+ "92": 35,
121
+ "91": 36,
122
+ "18": 37,
123
+ "19": 38,
124
+ "90": 39,
125
+ "20": 40,
126
+ "89": 41,
127
+ "21": 42,
128
+ "88": 43,
129
+ "22": 44,
130
+ "87": 45,
131
+ "23": 46,
132
+ "86": 47,
133
+ "24": 48,
134
+ "85": 49,
135
+ "84": 50,
136
+ "25": 51,
137
+ "26": 52,
138
+ "83": 53,
139
+ "27": 54,
140
+ "82": 55
141
  },
142
  "merges": [
 
143
  "9 9",
144
+ "1 0",
145
  "9 8",
146
+ "1 1",
147
  "1 2",
148
  "9 7",
 
149
  "1 3",
150
+ "9 6",
151
  "9 5",
152
+ "1 4",
153
  "9 4",
154
+ "1 5",
155
  "1 6",
156
  "9 3",
157
  "1 7",
158
  "9 2",
 
159
  "9 1",
160
+ "1 8",
161
  "1 9",
162
  "9 0",
163
  "2 0",
164
  "8 9",
 
165
  "2 1",
166
+ "8 8",
167
  "2 2",
168
  "8 7",
169
  "2 3",
 
174
  "2 5",
175
  "2 6",
176
  "8 3",
177
+ "2 7",
178
+ "8 2"
179
  ]
180
  }
181
  }
tokenizer_config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "added_tokens_decoder": {
3
- "48": {
4
- "content": "[PAD]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
- "49": {
12
  "content": "[CLS]",
13
  "lstrip": false,
14
  "normalized": false,
@@ -16,7 +16,39 @@
16
  "single_word": false,
17
  "special": true
18
  },
19
- "50": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "content": "[EOS]",
21
  "lstrip": false,
22
  "normalized": false,
 
1
  {
2
  "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "1": {
12
  "content": "[CLS]",
13
  "lstrip": false,
14
  "normalized": false,
 
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "[BOS]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
  "content": "[EOS]",
53
  "lstrip": false,
54
  "normalized": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c22510d433bcd6c8ab9f638615a22414f615f7dc618aab32a485345ef2763c73
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6659b0093eeff414bea7b0676fa26b8ced67d1bab20bc510776e16dcea6efb68
3
  size 4920