kinzoku99 commited on
Commit
f8fcad2
1 Parent(s): e38d8c5

End of training

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 0.9144
17
 
18
  ## Model description
19
 
@@ -44,46 +44,46 @@ The following hyperparameters were used during training:
44
 
45
  | Training Loss | Epoch | Step | Validation Loss |
46
  |:-------------:|:-----:|:----:|:---------------:|
47
- | 3.3969 | 1.0 | 6 | 2.7182 |
48
- | 2.3315 | 2.0 | 12 | 1.9518 |
49
- | 1.7958 | 3.0 | 18 | 1.6778 |
50
- | 1.6131 | 4.0 | 24 | 1.5845 |
51
- | 1.5584 | 5.0 | 30 | 1.5760 |
52
- | 1.548 | 6.0 | 36 | 1.5781 |
53
- | 1.5369 | 7.0 | 42 | 1.5516 |
54
- | 1.5412 | 8.0 | 48 | 1.6085 |
55
- | 1.5311 | 9.0 | 54 | 1.5398 |
56
- | 1.5435 | 10.0 | 60 | 1.5587 |
57
- | 1.5138 | 11.0 | 66 | 1.5452 |
58
- | 1.5192 | 12.0 | 72 | 1.5345 |
59
- | 1.4818 | 13.0 | 78 | 1.6037 |
60
- | 1.4935 | 14.0 | 84 | 1.5391 |
61
- | 1.4396 | 15.0 | 90 | 1.8690 |
62
- | 1.4801 | 16.0 | 96 | 1.4349 |
63
- | 1.4051 | 17.0 | 102 | 1.3765 |
64
- | 1.3563 | 18.0 | 108 | 1.3655 |
65
- | 1.3188 | 19.0 | 114 | 1.3211 |
66
- | 1.3023 | 20.0 | 120 | 1.2695 |
67
- | 1.259 | 21.0 | 126 | 1.2644 |
68
- | 1.2366 | 22.0 | 132 | 1.2287 |
69
- | 1.2252 | 23.0 | 138 | 1.1858 |
70
- | 1.2205 | 24.0 | 144 | 1.1587 |
71
- | 1.1593 | 25.0 | 150 | 1.1855 |
72
- | 1.1639 | 26.0 | 156 | 1.1943 |
73
- | 1.1617 | 27.0 | 162 | 1.1305 |
74
- | 1.1493 | 28.0 | 168 | 1.0901 |
75
- | 1.1031 | 29.0 | 174 | 1.0640 |
76
- | 1.0901 | 30.0 | 180 | 1.0242 |
77
- | 1.0582 | 31.0 | 186 | 1.0066 |
78
- | 1.0423 | 32.0 | 192 | 0.9996 |
79
- | 1.0239 | 33.0 | 198 | 0.9732 |
80
- | 1.0151 | 34.0 | 204 | 0.9871 |
81
- | 1.0452 | 35.0 | 210 | 0.9655 |
82
- | 1.009 | 36.0 | 216 | 0.9411 |
83
- | 0.9819 | 37.0 | 222 | 0.9342 |
84
- | 0.9773 | 38.0 | 228 | 0.9235 |
85
- | 0.9743 | 39.0 | 234 | 0.9175 |
86
- | 0.9722 | 40.0 | 240 | 0.9144 |
87
 
88
 
89
  ### Framework versions
 
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 0.6087
17
 
18
  ## Model description
19
 
 
44
 
45
  | Training Loss | Epoch | Step | Validation Loss |
46
  |:-------------:|:-----:|:----:|:---------------:|
47
+ | 3.3748 | 1.0 | 6 | 2.7546 |
48
+ | 2.3666 | 2.0 | 12 | 2.0282 |
49
+ | 1.8315 | 3.0 | 18 | 1.6880 |
50
+ | 1.616 | 4.0 | 24 | 1.5382 |
51
+ | 1.503 | 5.0 | 30 | 1.4603 |
52
+ | 1.4344 | 6.0 | 36 | 1.4103 |
53
+ | 1.3878 | 7.0 | 42 | 1.3806 |
54
+ | 1.3418 | 8.0 | 48 | 1.2722 |
55
+ | 1.2509 | 9.0 | 54 | 1.2194 |
56
+ | 1.2581 | 10.0 | 60 | 1.2255 |
57
+ | 1.2361 | 11.0 | 66 | 1.4183 |
58
+ | 1.259 | 12.0 | 72 | 1.1575 |
59
+ | 1.1246 | 13.0 | 78 | 1.1058 |
60
+ | 1.0757 | 14.0 | 84 | 1.0496 |
61
+ | 1.0499 | 15.0 | 90 | 1.0402 |
62
+ | 0.9797 | 16.0 | 96 | 0.9715 |
63
+ | 0.9347 | 17.0 | 102 | 0.9876 |
64
+ | 0.9267 | 18.0 | 108 | 0.9119 |
65
+ | 0.8861 | 19.0 | 114 | 0.9250 |
66
+ | 0.9253 | 20.0 | 120 | 0.8471 |
67
+ | 0.9204 | 21.0 | 126 | 0.8488 |
68
+ | 0.8545 | 22.0 | 132 | 0.8404 |
69
+ | 0.8283 | 23.0 | 138 | 0.7885 |
70
+ | 0.7892 | 24.0 | 144 | 0.8518 |
71
+ | 0.804 | 25.0 | 150 | 0.9104 |
72
+ | 0.8278 | 26.0 | 156 | 0.7776 |
73
+ | 0.7759 | 27.0 | 162 | 0.7776 |
74
+ | 0.7388 | 28.0 | 168 | 0.7396 |
75
+ | 0.7323 | 29.0 | 174 | 0.7238 |
76
+ | 0.727 | 30.0 | 180 | 0.6978 |
77
+ | 0.7104 | 31.0 | 186 | 0.6916 |
78
+ | 0.6964 | 32.0 | 192 | 0.6704 |
79
+ | 0.6797 | 33.0 | 198 | 0.6547 |
80
+ | 0.661 | 34.0 | 204 | 0.6457 |
81
+ | 0.6567 | 35.0 | 210 | 0.6497 |
82
+ | 0.6494 | 36.0 | 216 | 0.6411 |
83
+ | 0.6526 | 37.0 | 222 | 0.6314 |
84
+ | 0.6379 | 38.0 | 228 | 0.6150 |
85
+ | 0.6224 | 39.0 | 234 | 0.6093 |
86
+ | 0.6231 | 40.0 | 240 | 0.6087 |
87
 
88
 
89
  ### Framework versions
config.json CHANGED
@@ -78,7 +78,7 @@
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "use_cache": true,
81
- "vocab_size": 51
82
  },
83
  "decoder_start_token_id": 2,
84
  "encoder": {
@@ -157,7 +157,7 @@
157
  "typical_p": 1.0,
158
  "use_bfloat16": false,
159
  "use_cache": true,
160
- "vocab_size": 51
161
  },
162
  "eos_token_id": 0,
163
  "is_encoder_decoder": true,
 
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "use_cache": true,
81
+ "vocab_size": 52
82
  },
83
  "decoder_start_token_id": 2,
84
  "encoder": {
 
157
  "typical_p": 1.0,
158
  "use_bfloat16": false,
159
  "use_cache": true,
160
+ "vocab_size": 52
161
  },
162
  "eos_token_id": 0,
163
  "is_encoder_decoder": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f024fc0d48f55ef44ccd78e8ff1076f4c625b8711bb014ed7c234337f0c6cad2
3
- size 31203500
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518691f2a4d764491e51310041d6488681fc1f56308e3dc613f0d19776354c8f
3
+ size 31205552
runs/Mar11_14-39-47_MSI/events.out.tfevents.1710164392.MSI.11656.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50401b29756c0e7e939a2e88415da556e1bef2f8dd6a7c52a45a8f87cdba4b8d
3
+ size 28005
tokenizer.json CHANGED
@@ -112,77 +112,79 @@
112
  "7": 13,
113
  "8": 14,
114
  "9": 15,
115
- "10": 16,
116
- "99": 17,
117
  "11": 18,
118
  "98": 19,
119
  "97": 20,
120
  "12": 21,
121
- "96": 22,
122
- "13": 23,
123
  "95": 24,
124
  "14": 25,
125
- "15": 26,
126
- "94": 27,
127
- "16": 28,
128
- "93": 29,
129
  "17": 30,
130
  "92": 31,
131
- "91": 32,
132
- "18": 33,
133
- "90": 34,
134
- "19": 35,
135
  "20": 36,
136
  "89": 37,
137
  "21": 38,
138
  "88": 39,
139
  "87": 40,
140
  "22": 41,
141
- "23": 42,
142
- "86": 43,
143
  "24": 44,
144
  "85": 45,
145
  "25": 46,
146
  "84": 47,
147
- "83": 48,
148
- "26": 49,
149
- "82": 50
 
150
  },
151
  "merges": [
152
- "1 0",
153
  "9 9",
 
154
  "1 1",
155
  "9 8",
156
  "9 7",
157
  "1 2",
158
- "9 6",
159
  "1 3",
 
160
  "9 5",
161
  "1 4",
162
- "1 5",
163
  "9 4",
164
- "1 6",
165
  "9 3",
 
166
  "1 7",
167
  "9 2",
168
- "9 1",
169
  "1 8",
170
- "9 0",
171
  "1 9",
 
172
  "2 0",
173
  "8 9",
174
  "2 1",
175
  "8 8",
176
  "8 7",
177
  "2 2",
178
- "2 3",
179
  "8 6",
 
180
  "2 4",
181
  "8 5",
182
  "2 5",
183
  "8 4",
184
- "8 3",
185
  "2 6",
 
 
186
  "8 2"
187
  ]
188
  }
 
112
  "7": 13,
113
  "8": 14,
114
  "9": 15,
115
+ "99": 16,
116
+ "10": 17,
117
  "11": 18,
118
  "98": 19,
119
  "97": 20,
120
  "12": 21,
121
+ "13": 22,
122
+ "96": 23,
123
  "95": 24,
124
  "14": 25,
125
+ "94": 26,
126
+ "15": 27,
127
+ "93": 28,
128
+ "16": 29,
129
  "17": 30,
130
  "92": 31,
131
+ "18": 32,
132
+ "91": 33,
133
+ "19": 34,
134
+ "90": 35,
135
  "20": 36,
136
  "89": 37,
137
  "21": 38,
138
  "88": 39,
139
  "87": 40,
140
  "22": 41,
141
+ "86": 42,
142
+ "23": 43,
143
  "24": 44,
144
  "85": 45,
145
  "25": 46,
146
  "84": 47,
147
+ "26": 48,
148
+ "83": 49,
149
+ "27": 50,
150
+ "82": 51
151
  },
152
  "merges": [
 
153
  "9 9",
154
+ "1 0",
155
  "1 1",
156
  "9 8",
157
  "9 7",
158
  "1 2",
 
159
  "1 3",
160
+ "9 6",
161
  "9 5",
162
  "1 4",
 
163
  "9 4",
164
+ "1 5",
165
  "9 3",
166
+ "1 6",
167
  "1 7",
168
  "9 2",
 
169
  "1 8",
170
+ "9 1",
171
  "1 9",
172
+ "9 0",
173
  "2 0",
174
  "8 9",
175
  "2 1",
176
  "8 8",
177
  "8 7",
178
  "2 2",
 
179
  "8 6",
180
+ "2 3",
181
  "2 4",
182
  "8 5",
183
  "2 5",
184
  "8 4",
 
185
  "2 6",
186
+ "8 3",
187
+ "2 7",
188
  "8 2"
189
  ]
190
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f75e54efb61940b1887367ab9401dfc1503f0cb31e8ddbd9f6b73ecbe867686
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffb903b082f12ef3984019900e028e957ed048a41aee7d892b5f53d59456a24c
3
  size 5112