BartekSadlej commited on
Commit
6cbd401
1 Parent(s): 6b0ae77

End of training

Browse files
README.md CHANGED
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
- - Loss: 0.0923
17
 
18
  ## Model description
19
 
@@ -44,46 +44,46 @@ The following hyperparameters were used during training:
44
 
45
  | Training Loss | Epoch | Step | Validation Loss |
46
  |:-------------:|:-----:|:----:|:---------------:|
47
- | 3.0524 | 1.0 | 6 | 2.3364 |
48
- | 2.0601 | 2.0 | 12 | 1.7532 |
49
- | 1.5868 | 3.0 | 18 | 1.3439 |
50
- | 1.2198 | 4.0 | 24 | 1.0697 |
51
- | 1.0324 | 5.0 | 30 | 0.9496 |
52
- | 0.8954 | 6.0 | 36 | 0.8209 |
53
- | 0.7853 | 7.0 | 42 | 0.7529 |
54
- | 0.7118 | 8.0 | 48 | 0.6709 |
55
- | 0.6574 | 9.0 | 54 | 0.6452 |
56
- | 0.6368 | 10.0 | 60 | 0.5870 |
57
- | 0.5746 | 11.0 | 66 | 0.5092 |
58
- | 0.5092 | 12.0 | 72 | 0.4923 |
59
- | 0.4829 | 13.0 | 78 | 0.4596 |
60
- | 0.4542 | 14.0 | 84 | 0.4221 |
61
- | 0.4286 | 15.0 | 90 | 0.3805 |
62
- | 0.3987 | 16.0 | 96 | 0.3707 |
63
- | 0.3704 | 17.0 | 102 | 0.3424 |
64
- | 0.3639 | 18.0 | 108 | 0.3204 |
65
- | 0.3297 | 19.0 | 114 | 0.2918 |
66
- | 0.3085 | 20.0 | 120 | 0.2683 |
67
- | 0.2814 | 21.0 | 126 | 0.2524 |
68
- | 0.2667 | 22.0 | 132 | 0.2489 |
69
- | 0.2593 | 23.0 | 138 | 0.2238 |
70
- | 0.2427 | 24.0 | 144 | 0.2168 |
71
- | 0.2318 | 25.0 | 150 | 0.2048 |
72
- | 0.226 | 26.0 | 156 | 0.2018 |
73
- | 0.2085 | 27.0 | 162 | 0.1860 |
74
- | 0.1976 | 28.0 | 168 | 0.1800 |
75
- | 0.1928 | 29.0 | 174 | 0.1621 |
76
- | 0.1814 | 30.0 | 180 | 0.1551 |
77
- | 0.1758 | 31.0 | 186 | 0.1398 |
78
- | 0.1598 | 32.0 | 192 | 0.1291 |
79
- | 0.1541 | 33.0 | 198 | 0.1206 |
80
- | 0.1411 | 34.0 | 204 | 0.1097 |
81
- | 0.1371 | 35.0 | 210 | 0.1042 |
82
- | 0.1299 | 36.0 | 216 | 0.1015 |
83
- | 0.1273 | 37.0 | 222 | 0.0955 |
84
- | 0.1214 | 38.0 | 228 | 0.0944 |
85
- | 0.1161 | 39.0 | 234 | 0.0935 |
86
- | 0.1188 | 40.0 | 240 | 0.0923 |
87
 
88
 
89
  ### Framework versions
 
13
 
14
  This model is a fine-tuned version of [](https://huggingface.co/) on the None dataset.
15
  It achieves the following results on the evaluation set:
16
+ - Loss: 0.8846
17
 
18
  ## Model description
19
 
 
44
 
45
  | Training Loss | Epoch | Step | Validation Loss |
46
  |:-------------:|:-----:|:----:|:---------------:|
47
+ | 3.7375 | 1.0 | 14 | 2.8446 |
48
+ | 2.5309 | 2.0 | 28 | 2.3889 |
49
+ | 2.3406 | 3.0 | 42 | 2.3073 |
50
+ | 2.2691 | 4.0 | 56 | 2.2098 |
51
+ | 2.1412 | 5.0 | 70 | 2.0464 |
52
+ | 1.9372 | 6.0 | 84 | 1.7744 |
53
+ | 1.6761 | 7.0 | 98 | 1.5399 |
54
+ | 1.4725 | 8.0 | 112 | 1.3886 |
55
+ | 1.368 | 9.0 | 126 | 1.3246 |
56
+ | 1.33 | 10.0 | 140 | 1.3355 |
57
+ | 1.3119 | 11.0 | 154 | 1.2886 |
58
+ | 1.2836 | 12.0 | 168 | 1.2712 |
59
+ | 1.2668 | 13.0 | 182 | 1.2703 |
60
+ | 1.2526 | 14.0 | 196 | 1.2477 |
61
+ | 1.2292 | 15.0 | 210 | 1.2339 |
62
+ | 1.203 | 16.0 | 224 | 1.1997 |
63
+ | 1.1686 | 17.0 | 238 | 1.1764 |
64
+ | 1.1308 | 18.0 | 252 | 1.1424 |
65
+ | 1.0866 | 19.0 | 266 | 1.1034 |
66
+ | 1.0355 | 20.0 | 280 | 1.0546 |
67
+ | 1.0031 | 21.0 | 294 | 1.0241 |
68
+ | 0.9608 | 22.0 | 308 | 0.9925 |
69
+ | 0.924 | 23.0 | 322 | 0.9673 |
70
+ | 0.9022 | 24.0 | 336 | 0.9555 |
71
+ | 0.8733 | 25.0 | 350 | 0.9381 |
72
+ | 0.8549 | 26.0 | 364 | 0.9394 |
73
+ | 0.8363 | 27.0 | 378 | 0.9274 |
74
+ | 0.8129 | 28.0 | 392 | 0.9211 |
75
+ | 0.7894 | 29.0 | 406 | 0.9149 |
76
+ | 0.7705 | 30.0 | 420 | 0.9042 |
77
+ | 0.7509 | 31.0 | 434 | 0.8962 |
78
+ | 0.7363 | 32.0 | 448 | 0.9003 |
79
+ | 0.7261 | 33.0 | 462 | 0.8935 |
80
+ | 0.7135 | 34.0 | 476 | 0.8923 |
81
+ | 0.6988 | 35.0 | 490 | 0.8961 |
82
+ | 0.6883 | 36.0 | 504 | 0.8883 |
83
+ | 0.6768 | 37.0 | 518 | 0.8905 |
84
+ | 0.6686 | 38.0 | 532 | 0.8885 |
85
+ | 0.6625 | 39.0 | 546 | 0.8865 |
86
+ | 0.6566 | 40.0 | 560 | 0.8846 |
87
 
88
 
89
  ### Framework versions
config.json CHANGED
@@ -78,7 +78,7 @@
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "use_cache": true,
81
- "vocab_size": 53
82
  },
83
  "decoder_start_token_id": 2,
84
  "encoder": {
@@ -157,7 +157,7 @@
157
  "typical_p": 1.0,
158
  "use_bfloat16": false,
159
  "use_cache": true,
160
- "vocab_size": 53
161
  },
162
  "eos_token_id": 0,
163
  "is_encoder_decoder": true,
 
78
  "typical_p": 1.0,
79
  "use_bfloat16": false,
80
  "use_cache": true,
81
+ "vocab_size": 203
82
  },
83
  "decoder_start_token_id": 2,
84
  "encoder": {
 
157
  "typical_p": 1.0,
158
  "use_bfloat16": false,
159
  "use_cache": true,
160
+ "vocab_size": 203
161
  },
162
  "eos_token_id": 0,
163
  "is_encoder_decoder": true,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea68d63e33c6aa4e3963c65007072dd91138022c527fb9911e38f90037f0ea1a
3
- size 31207604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e78d9969feecc268ed5e04055443cc1b2195b84cef566954c1fadb7470c6734
3
+ size 31515412
runs/Mar04_10-09-47_f9b5e148b874/events.out.tfevents.1709546988.f9b5e148b874.5786.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b956a0b97785adc2584e4031dc2b05b4b03365958f189a713a98581a0fcedb2
3
+ size 28107
tokenizer.json CHANGED
@@ -114,79 +114,379 @@
114
  "9": 15,
115
  "=": 16,
116
  "10": 17,
117
- "11": 18,
118
- "12": 19,
119
- "13": 20,
120
- "14": 21,
121
- "15": 22,
122
- "99": 23,
123
- "16": 24,
124
- "98": 25,
125
- "97": 26,
126
- "96": 27,
127
- "17": 28,
128
- "95": 29,
129
- "94": 30,
130
- "93": 31,
131
- "18": 32,
132
- "92": 33,
133
- "91": 34,
134
- "90": 35,
135
- "19": 36,
136
  "20": 37,
137
- "89": 38,
138
- "21": 39,
139
- "88": 40,
140
- "87": 41,
141
- "22": 42,
142
- "23": 43,
143
- "86": 44,
144
- "85": 45,
145
- "24": 46,
146
- "25": 47,
147
- "84": 48,
148
- "83": 49,
149
- "26": 50,
150
- "27": 51,
151
- "82": 52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  },
153
  "merges": [
154
  "1 0",
155
- "1 1",
156
  "1 2",
157
- "1 3",
158
- "1 4",
159
  "1 5",
160
- "9 9",
 
161
  "1 6",
162
- "9 8",
163
- "9 7",
164
- "9 6",
165
  "1 7",
166
- "9 5",
167
- "9 4",
168
- "9 3",
169
  "1 8",
170
- "9 2",
171
- "9 1",
172
- "9 0",
173
  "1 9",
174
- "2 0",
175
- "8 9",
176
  "2 1",
 
 
 
 
 
 
 
 
 
 
 
177
  "8 8",
178
- "8 7",
 
 
179
  "2 2",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  "2 3",
181
- "8 6",
 
 
182
  "8 5",
183
- "2 4",
184
- "2 5",
185
- "8 4",
186
  "8 3",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  "2 6",
188
- "2 7",
189
- "8 2"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  ]
191
  }
192
  }
 
114
  "9": 15,
115
  "=": 16,
116
  "10": 17,
117
+ "12": 18,
118
+ "15": 19,
119
+ "14": 20,
120
+ "13": 21,
121
+ "16": 22,
122
+ "11": 23,
123
+ "17": 24,
124
+ "18": 25,
125
+ "19": 26,
126
+ "27": 27,
127
+ "47": 28,
128
+ "21": 29,
129
+ "74": 30,
130
+ "37": 31,
131
+ "28": 32,
132
+ "77": 33,
133
+ "93": 34,
134
+ "58": 35,
135
+ "39": 36,
136
  "20": 37,
137
+ "63": 38,
138
+ "33": 39,
139
+ "54": 40,
140
+ "88": 41,
141
+ "34": 42,
142
+ "45": 43,
143
+ "29": 44,
144
+ "22": 45,
145
+ "64": 46,
146
+ "78": 47,
147
+ "30": 48,
148
+ "40": 49,
149
+ "49": 50,
150
+ "36": 51,
151
+ "68": 52,
152
+ "25": 53,
153
+ "96": 54,
154
+ "80": 55,
155
+ "66": 56,
156
+ "52": 57,
157
+ "59": 58,
158
+ "82": 59,
159
+ "23": 60,
160
+ "79": 61,
161
+ "87": 62,
162
+ "43": 63,
163
+ "85": 64,
164
+ "83": 65,
165
+ "46": 66,
166
+ "75": 67,
167
+ "92": 68,
168
+ "97": 69,
169
+ "99": 70,
170
+ "65": 71,
171
+ "35": 72,
172
+ "98": 73,
173
+ "24": 74,
174
+ "48": 75,
175
+ "50": 76,
176
+ "67": 77,
177
+ "73": 78,
178
+ "86": 79,
179
+ "69": 80,
180
+ "95": 81,
181
+ "26": 82,
182
+ "62": 83,
183
+ "72": 84,
184
+ "91": 85,
185
+ "53": 86,
186
+ "84": 87,
187
+ "31": 88,
188
+ "42": 89,
189
+ "61": 90,
190
+ "38": 91,
191
+ "57": 92,
192
+ "56": 93,
193
+ "51": 94,
194
+ "55": 95,
195
+ "60": 96,
196
+ "94": 97,
197
+ "76": 98,
198
+ "90": 99,
199
+ "71": 100,
200
+ "81": 101,
201
+ "44": 102,
202
+ "32": 103,
203
+ "89": 104,
204
+ "41": 105,
205
+ "70": 106,
206
+ "103": 107,
207
+ "104": 108,
208
+ "116": 109,
209
+ "100": 110,
210
+ "105": 111,
211
+ "125": 112,
212
+ "107": 113,
213
+ "115": 114,
214
+ "101": 115,
215
+ "118": 116,
216
+ "110": 117,
217
+ "102": 118,
218
+ "109": 119,
219
+ "123": 120,
220
+ "111": 121,
221
+ "139": 122,
222
+ "106": 123,
223
+ "108": 124,
224
+ "131": 125,
225
+ "117": 126,
226
+ "120": 127,
227
+ "112": 128,
228
+ "129": 129,
229
+ "141": 130,
230
+ "114": 131,
231
+ "122": 132,
232
+ "144": 133,
233
+ "130": 134,
234
+ "113": 135,
235
+ "121": 136,
236
+ "124": 137,
237
+ "119": 138,
238
+ "128": 139,
239
+ "142": 140,
240
+ "148": 141,
241
+ "134": 142,
242
+ "126": 143,
243
+ "152": 144,
244
+ "132": 145,
245
+ "151": 146,
246
+ "133": 147,
247
+ "136": 148,
248
+ "138": 149,
249
+ "127": 150,
250
+ "153": 151,
251
+ "159": 152,
252
+ "147": 153,
253
+ "149": 154,
254
+ "137": 155,
255
+ "150": 156,
256
+ "154": 157,
257
+ "145": 158,
258
+ "135": 159,
259
+ "170": 160,
260
+ "158": 161,
261
+ "140": 162,
262
+ "143": 163,
263
+ "146": 164,
264
+ "167": 165,
265
+ "155": 166,
266
+ "157": 167,
267
+ "160": 168,
268
+ "161": 169,
269
+ "163": 170,
270
+ "166": 171,
271
+ "162": 172,
272
+ "165": 173,
273
+ "156": 174,
274
+ "164": 175,
275
+ "169": 176,
276
+ "173": 177,
277
+ "174": 178,
278
+ "180": 179,
279
+ "175": 180,
280
+ "178": 181,
281
+ "172": 182,
282
+ "181": 183,
283
+ "176": 184,
284
+ "177": 185,
285
+ "168": 186,
286
+ "171": 187,
287
+ "179": 188,
288
+ "182": 189,
289
+ "183": 190,
290
+ "184": 191,
291
+ "186": 192,
292
+ "187": 193,
293
+ "191": 194,
294
+ "192": 195,
295
+ "185": 196,
296
+ "189": 197,
297
+ "194": 198,
298
+ "197": 199,
299
+ "188": 200,
300
+ "190": 201,
301
+ "193": 202
302
  },
303
  "merges": [
304
  "1 0",
 
305
  "1 2",
 
 
306
  "1 5",
307
+ "1 4",
308
+ "1 3",
309
  "1 6",
310
+ "1 1",
 
 
311
  "1 7",
 
 
 
312
  "1 8",
 
 
 
313
  "1 9",
314
+ "2 7",
315
+ "4 7",
316
  "2 1",
317
+ "7 4",
318
+ "3 7",
319
+ "2 8",
320
+ "7 7",
321
+ "9 3",
322
+ "5 8",
323
+ "3 9",
324
+ "2 0",
325
+ "6 3",
326
+ "3 3",
327
+ "5 4",
328
  "8 8",
329
+ "3 4",
330
+ "4 5",
331
+ "2 9",
332
  "2 2",
333
+ "6 4",
334
+ "7 8",
335
+ "3 0",
336
+ "4 0",
337
+ "4 9",
338
+ "3 6",
339
+ "6 8",
340
+ "2 5",
341
+ "9 6",
342
+ "8 0",
343
+ "6 6",
344
+ "5 2",
345
+ "5 9",
346
+ "8 2",
347
  "2 3",
348
+ "7 9",
349
+ "8 7",
350
+ "4 3",
351
  "8 5",
 
 
 
352
  "8 3",
353
+ "4 6",
354
+ "7 5",
355
+ "9 2",
356
+ "9 7",
357
+ "9 9",
358
+ "6 5",
359
+ "3 5",
360
+ "9 8",
361
+ "2 4",
362
+ "4 8",
363
+ "5 0",
364
+ "6 7",
365
+ "7 3",
366
+ "8 6",
367
+ "6 9",
368
+ "9 5",
369
  "2 6",
370
+ "6 2",
371
+ "7 2",
372
+ "9 1",
373
+ "5 3",
374
+ "8 4",
375
+ "3 1",
376
+ "4 2",
377
+ "6 1",
378
+ "3 8",
379
+ "5 7",
380
+ "5 6",
381
+ "5 1",
382
+ "5 5",
383
+ "6 0",
384
+ "9 4",
385
+ "7 6",
386
+ "9 0",
387
+ "7 1",
388
+ "8 1",
389
+ "4 4",
390
+ "3 2",
391
+ "8 9",
392
+ "4 1",
393
+ "7 0",
394
+ "10 3",
395
+ "10 4",
396
+ "1 16",
397
+ "10 0",
398
+ "10 5",
399
+ "12 5",
400
+ "10 7",
401
+ "1 15",
402
+ "10 1",
403
+ "11 8",
404
+ "1 10",
405
+ "10 2",
406
+ "10 9",
407
+ "12 3",
408
+ "11 1",
409
+ "13 9",
410
+ "10 6",
411
+ "10 8",
412
+ "13 1",
413
+ "11 7",
414
+ "12 0",
415
+ "1 12",
416
+ "12 9",
417
+ "14 1",
418
+ "1 14",
419
+ "12 2",
420
+ "14 4",
421
+ "13 0",
422
+ "1 13",
423
+ "12 1",
424
+ "12 4",
425
+ "11 9",
426
+ "12 8",
427
+ "14 2",
428
+ "14 8",
429
+ "13 4",
430
+ "12 6",
431
+ "15 2",
432
+ "13 2",
433
+ "15 1",
434
+ "13 3",
435
+ "13 6",
436
+ "13 8",
437
+ "12 7",
438
+ "15 3",
439
+ "15 9",
440
+ "14 7",
441
+ "14 9",
442
+ "13 7",
443
+ "15 0",
444
+ "15 4",
445
+ "14 5",
446
+ "13 5",
447
+ "17 0",
448
+ "15 8",
449
+ "14 0",
450
+ "14 3",
451
+ "14 6",
452
+ "16 7",
453
+ "15 5",
454
+ "15 7",
455
+ "16 0",
456
+ "16 1",
457
+ "16 3",
458
+ "16 6",
459
+ "16 2",
460
+ "16 5",
461
+ "15 6",
462
+ "16 4",
463
+ "16 9",
464
+ "17 3",
465
+ "17 4",
466
+ "18 0",
467
+ "17 5",
468
+ "17 8",
469
+ "17 2",
470
+ "18 1",
471
+ "17 6",
472
+ "17 7",
473
+ "16 8",
474
+ "17 1",
475
+ "17 9",
476
+ "18 2",
477
+ "18 3",
478
+ "18 4",
479
+ "18 6",
480
+ "18 7",
481
+ "19 1",
482
+ "19 2",
483
+ "18 5",
484
+ "18 9",
485
+ "19 4",
486
+ "19 7",
487
+ "18 8",
488
+ "19 0",
489
+ "19 3"
490
  ]
491
  }
492
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4365175f4bd43cb181ceb471649e3c191725217a9b6655f6c931ccfc1f9c9098
3
  size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:114be9997458e51b939066b4032e7e8e4583a30b4815054287b6ce77029f9d58
3
  size 5112