gsaltintas commited on
Commit
a7861e6
·
verified ·
1 Parent(s): f8bfdb6

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +54 -0
  2. merges.txt +1 -0
  3. special_tokens_map.json +5 -0
  4. tokenizer.json +343 -0
  5. tokenizer_config.json +37 -0
  6. vocab.json +261 -0
README.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - dig
5
+
6
+ tags:
7
+ - tokenizer
8
+ - bpe
9
+ - flexitok
10
+ - fineweb2
11
+ ---
12
+
13
+ # Byte-Level BPE Tokenizer: ['digit'] (0K)
14
+
15
+ A **Byte-Level BPE** tokenizer trained on **['digit']** data from Fineweb-2-HQ.
16
+
17
+ ## Training Details
18
+
19
+ | Parameter | Value |
20
+ |-----------|-------|
21
+ | Algorithm | Byte-Level BPE |
22
+ | Language | `['digit']` |
23
+ | Target Vocab Size | 360 |
24
+ | Final Vocab Size | 259 |
25
+ | Pre-tokenizer | custom:addition_split_on_hyphen |
26
+ | Number handling | individual |
27
+ | Contraction handling | False |
28
+ | Normalizer | NFC |
29
+ | Special Tokens | `<s>`, `</s>`, `<pad>`, `<unk>` |
30
+ | Training Shards | 2, ['train.chunk.00.jsonl', 'val.chunk.00.jsonl'] |
31
+
32
+ ## Usage
33
+
34
+ ```python
35
+ from transformers import AutoTokenizer
36
+
37
+ tokenizer = AutoTokenizer.from_pretrained("flexitok/maddition_digit_individual_minimal_v2")
38
+ tokens = tokenizer.encode("Hello, world!")
39
+ ```
40
+
41
+ ## Files
42
+
43
+ - `tokenizer.json` — Full HuggingFace tokenizer
44
+ - `vocab.json` — Vocabulary mapping
45
+ - `merges.txt` — BPE merge rules
46
+
47
+ ## Sample Encoding
48
+ | Text | Tokens | Token IDs |
49
+ |------|--------|-----------|
50
+ | `22+9=31\nyirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `2, 2, +, 9, =, 3, 1, \, n, y, i, r, m, i, Ġ, i, k, i, +, d` | `20, 20, 13, 27, 31, 21, 19, 62, 80, 91, 75, 84, 79, 75, 223, 75, 77, 75, 13, 70` |
51
+
52
+ Command used to create this tokenizer:
53
+ ```bash
54
+ ['/home/gsa/tokenizers2/flexitok/tokenizer_training/train_tokenizers.py', 'algorithm=bpe', 'vocab_size=360', 'langs=[digit]', 'data_dir=/scratch/gsa/data/multilingual-addition/', 'output_dir=/scratch/gsa/trained_tokenizers/multilingual_addition', 'pretokenizer=custom:addition_split_on_hyphen', 'number_handling=individual', 'add_numbers=false', 'handle_contractions=false', 'unicode_normalization=nfc', 'use_byte_level_regex=false', 'byte_fallback=false', 'strip_zero_width=false', 'cjk_char_split=false', 'add_cjk_chars=false', 'max_lines=-1', 'test_string=22+9=31\\nyirmi iki+dokuz=otuz bir\\ntwenty two+nine=thirty one', 'hf.publish_to_hf=true', 'hf_repo_prefix=flexitok/', 'hf.hf_repo_id=flexitok/maddition_digit_individual_minimal_v2', 'hf.collections=[flexitok/multilingual_addition_tokenizers_minimal]']
merges.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ #version: 0.2
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "<pad>"
5
+ }
tokenizer.json ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
5
+ "added_tokens": [
6
+ {
7
+ "id": 0,
8
+ "content": "<s>",
9
+ "single_word": false,
10
+ "lstrip": false,
11
+ "rstrip": false,
12
+ "normalized": false,
13
+ "special": true
14
+ },
15
+ {
16
+ "id": 1,
17
+ "content": "</s>",
18
+ "single_word": false,
19
+ "lstrip": false,
20
+ "rstrip": false,
21
+ "normalized": false,
22
+ "special": true
23
+ },
24
+ {
25
+ "id": 2,
26
+ "content": "<pad>",
27
+ "single_word": false,
28
+ "lstrip": false,
29
+ "rstrip": false,
30
+ "normalized": false,
31
+ "special": true
32
+ }
33
+ ],
34
+ "normalizer": {
35
+ "type": "NFC"
36
+ },
37
+ "pre_tokenizer": {
38
+ "type": "Sequence",
39
+ "pretokenizers": [
40
+ {
41
+ "type": "Split",
42
+ "pattern": {
43
+ "Regex": "[+=\\-]|[^\\S\\r\\n]*[\\n\\r]+|[^\\S\\r\\n]+"
44
+ },
45
+ "behavior": "Isolated",
46
+ "invert": false
47
+ },
48
+ {
49
+ "type": "Split",
50
+ "pattern": {
51
+ "Regex": "\\p{N}"
52
+ },
53
+ "behavior": "Isolated",
54
+ "invert": false
55
+ },
56
+ {
57
+ "type": "ByteLevel",
58
+ "add_prefix_space": false,
59
+ "trim_offsets": true,
60
+ "use_regex": false
61
+ }
62
+ ]
63
+ },
64
+ "post_processor": null,
65
+ "decoder": {
66
+ "type": "ByteLevel",
67
+ "add_prefix_space": true,
68
+ "trim_offsets": true,
69
+ "use_regex": true
70
+ },
71
+ "model": {
72
+ "type": "BPE",
73
+ "dropout": null,
74
+ "unk_token": null,
75
+ "continuing_subword_prefix": null,
76
+ "end_of_word_suffix": null,
77
+ "fuse_unk": false,
78
+ "byte_fallback": false,
79
+ "ignore_merges": false,
80
+ "vocab": {
81
+ "<s>": 0,
82
+ "</s>": 1,
83
+ "<pad>": 2,
84
+ "!": 3,
85
+ "\"": 4,
86
+ "#": 5,
87
+ "$": 6,
88
+ "%": 7,
89
+ "&": 8,
90
+ "'": 9,
91
+ "(": 10,
92
+ ")": 11,
93
+ "*": 12,
94
+ "+": 13,
95
+ ",": 14,
96
+ "-": 15,
97
+ ".": 16,
98
+ "/": 17,
99
+ "0": 18,
100
+ "1": 19,
101
+ "2": 20,
102
+ "3": 21,
103
+ "4": 22,
104
+ "5": 23,
105
+ "6": 24,
106
+ "7": 25,
107
+ "8": 26,
108
+ "9": 27,
109
+ ":": 28,
110
+ ";": 29,
111
+ "<": 30,
112
+ "=": 31,
113
+ ">": 32,
114
+ "?": 33,
115
+ "@": 34,
116
+ "A": 35,
117
+ "B": 36,
118
+ "C": 37,
119
+ "D": 38,
120
+ "E": 39,
121
+ "F": 40,
122
+ "G": 41,
123
+ "H": 42,
124
+ "I": 43,
125
+ "J": 44,
126
+ "K": 45,
127
+ "L": 46,
128
+ "M": 47,
129
+ "N": 48,
130
+ "O": 49,
131
+ "P": 50,
132
+ "Q": 51,
133
+ "R": 52,
134
+ "S": 53,
135
+ "T": 54,
136
+ "U": 55,
137
+ "V": 56,
138
+ "W": 57,
139
+ "X": 58,
140
+ "Y": 59,
141
+ "Z": 60,
142
+ "[": 61,
143
+ "\\": 62,
144
+ "]": 63,
145
+ "^": 64,
146
+ "_": 65,
147
+ "`": 66,
148
+ "a": 67,
149
+ "b": 68,
150
+ "c": 69,
151
+ "d": 70,
152
+ "e": 71,
153
+ "f": 72,
154
+ "g": 73,
155
+ "h": 74,
156
+ "i": 75,
157
+ "j": 76,
158
+ "k": 77,
159
+ "l": 78,
160
+ "m": 79,
161
+ "n": 80,
162
+ "o": 81,
163
+ "p": 82,
164
+ "q": 83,
165
+ "r": 84,
166
+ "s": 85,
167
+ "t": 86,
168
+ "u": 87,
169
+ "v": 88,
170
+ "w": 89,
171
+ "x": 90,
172
+ "y": 91,
173
+ "z": 92,
174
+ "{": 93,
175
+ "|": 94,
176
+ "}": 95,
177
+ "~": 96,
178
+ "¡": 97,
179
+ "¢": 98,
180
+ "£": 99,
181
+ "¤": 100,
182
+ "¥": 101,
183
+ "¦": 102,
184
+ "§": 103,
185
+ "¨": 104,
186
+ "©": 105,
187
+ "ª": 106,
188
+ "«": 107,
189
+ "¬": 108,
190
+ "®": 109,
191
+ "¯": 110,
192
+ "°": 111,
193
+ "±": 112,
194
+ "²": 113,
195
+ "³": 114,
196
+ "´": 115,
197
+ "µ": 116,
198
+ "¶": 117,
199
+ "·": 118,
200
+ "¸": 119,
201
+ "¹": 120,
202
+ "º": 121,
203
+ "»": 122,
204
+ "¼": 123,
205
+ "½": 124,
206
+ "¾": 125,
207
+ "¿": 126,
208
+ "À": 127,
209
+ "Á": 128,
210
+ "Â": 129,
211
+ "Ã": 130,
212
+ "Ä": 131,
213
+ "Å": 132,
214
+ "Æ": 133,
215
+ "Ç": 134,
216
+ "È": 135,
217
+ "É": 136,
218
+ "Ê": 137,
219
+ "Ë": 138,
220
+ "Ì": 139,
221
+ "Í": 140,
222
+ "Î": 141,
223
+ "Ï": 142,
224
+ "Ð": 143,
225
+ "Ñ": 144,
226
+ "Ò": 145,
227
+ "Ó": 146,
228
+ "Ô": 147,
229
+ "Õ": 148,
230
+ "Ö": 149,
231
+ "×": 150,
232
+ "Ø": 151,
233
+ "Ù": 152,
234
+ "Ú": 153,
235
+ "Û": 154,
236
+ "Ü": 155,
237
+ "Ý": 156,
238
+ "Þ": 157,
239
+ "ß": 158,
240
+ "à": 159,
241
+ "á": 160,
242
+ "â": 161,
243
+ "ã": 162,
244
+ "ä": 163,
245
+ "å": 164,
246
+ "æ": 165,
247
+ "ç": 166,
248
+ "è": 167,
249
+ "é": 168,
250
+ "ê": 169,
251
+ "ë": 170,
252
+ "ì": 171,
253
+ "í": 172,
254
+ "î": 173,
255
+ "ï": 174,
256
+ "ð": 175,
257
+ "ñ": 176,
258
+ "ò": 177,
259
+ "ó": 178,
260
+ "ô": 179,
261
+ "õ": 180,
262
+ "ö": 181,
263
+ "÷": 182,
264
+ "ø": 183,
265
+ "ù": 184,
266
+ "ú": 185,
267
+ "û": 186,
268
+ "ü": 187,
269
+ "ý": 188,
270
+ "þ": 189,
271
+ "ÿ": 190,
272
+ "Ā": 191,
273
+ "ā": 192,
274
+ "Ă": 193,
275
+ "ă": 194,
276
+ "Ą": 195,
277
+ "ą": 196,
278
+ "Ć": 197,
279
+ "ć": 198,
280
+ "Ĉ": 199,
281
+ "ĉ": 200,
282
+ "Ċ": 201,
283
+ "ċ": 202,
284
+ "Č": 203,
285
+ "č": 204,
286
+ "Ď": 205,
287
+ "ď": 206,
288
+ "Đ": 207,
289
+ "đ": 208,
290
+ "Ē": 209,
291
+ "ē": 210,
292
+ "Ĕ": 211,
293
+ "ĕ": 212,
294
+ "Ė": 213,
295
+ "ė": 214,
296
+ "Ę": 215,
297
+ "ę": 216,
298
+ "Ě": 217,
299
+ "ě": 218,
300
+ "Ĝ": 219,
301
+ "ĝ": 220,
302
+ "Ğ": 221,
303
+ "ğ": 222,
304
+ "Ġ": 223,
305
+ "ġ": 224,
306
+ "Ģ": 225,
307
+ "ģ": 226,
308
+ "Ĥ": 227,
309
+ "ĥ": 228,
310
+ "Ħ": 229,
311
+ "ħ": 230,
312
+ "Ĩ": 231,
313
+ "ĩ": 232,
314
+ "Ī": 233,
315
+ "ī": 234,
316
+ "Ĭ": 235,
317
+ "ĭ": 236,
318
+ "Į": 237,
319
+ "į": 238,
320
+ "İ": 239,
321
+ "ı": 240,
322
+ "IJ": 241,
323
+ "ij": 242,
324
+ "Ĵ": 243,
325
+ "ĵ": 244,
326
+ "Ķ": 245,
327
+ "ķ": 246,
328
+ "ĸ": 247,
329
+ "Ĺ": 248,
330
+ "ĺ": 249,
331
+ "Ļ": 250,
332
+ "ļ": 251,
333
+ "Ľ": 252,
334
+ "ľ": 253,
335
+ "Ŀ": 254,
336
+ "ŀ": 255,
337
+ "Ł": 256,
338
+ "ł": 257,
339
+ "Ń": 258
340
+ },
341
+ "merges": []
342
+ }
343
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "</s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<s>",
29
+ "clean_up_tokenization_spaces": false,
30
+ "eos_token": "</s>",
31
+ "extra_special_tokens": {},
32
+ "model_max_length": 1000000000000000019884624838656,
33
+ "pad_token": "<pad>",
34
+ "tokenizer_class": "PreTrainedTokenizerFast",
35
+ "unk_token": null,
36
+ "number_handling": "individual"
37
+ }
vocab.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "ê": 169,
3
+ "W": 57,
4
+ "È": 135,
5
+ "±": 112,
6
+ "¨": 104,
7
+ "Ħ": 229,
8
+ "z": 92,
9
+ "®": 109,
10
+ "đ": 208,
11
+ "6": 24,
12
+ "ć": 198,
13
+ "ą": 196,
14
+ "Ì": 139,
15
+ "£": 99,
16
+ "Ā": 191,
17
+ "Ó": 146,
18
+ "V": 56,
19
+ "3": 21,
20
+ "¬": 108,
21
+ "Z": 60,
22
+ "ò": 177,
23
+ "Ë": 138,
24
+ "Ã": 130,
25
+ "-": 15,
26
+ "c": 69,
27
+ "ç": 166,
28
+ "U": 55,
29
+ "ü": 187,
30
+ "4": 22,
31
+ "9": 27,
32
+ "ª": 106,
33
+ "Õ": 148,
34
+ "¡": 97,
35
+ "d": 70,
36
+ "]": 63,
37
+ "G": 41,
38
+ "á": 160,
39
+ "î": 173,
40
+ "F": 40,
41
+ "Ĩ": 231,
42
+ "O": 49,
43
+ "·": 118,
44
+ "_": 65,
45
+ "ě": 218,
46
+ "É": 136,
47
+ "ā": 192,
48
+ "Č": 203,
49
+ "i": 75,
50
+ "³": 114,
51
+ "IJ": 241,
52
+ "q": 83,
53
+ "ă": 194,
54
+ "J": 44,
55
+ "Æ": 133,
56
+ "[": 61,
57
+ "ï": 174,
58
+ "A": 35,
59
+ "H": 42,
60
+ "ô": 179,
61
+ "&": 8,
62
+ "b": 68,
63
+ "*": 12,
64
+ "ģ": 226,
65
+ "ğ": 222,
66
+ "ë": 170,
67
+ "Ø": 151,
68
+ ">": 32,
69
+ "Î": 141,
70
+ "Ú": 153,
71
+ "u": 87,
72
+ "ð": 175,
73
+ "ĸ": 247,
74
+ "o": 81,
75
+ "ļ": 251,
76
+ "ē": 210,
77
+ "$": 6,
78
+ "õ": 180,
79
+ "%": 7,
80
+ "å": 164,
81
+ "I": 43,
82
+ "¦": 102,
83
+ "|": 94,
84
+ "Þ": 157,
85
+ "à": 159,
86
+ "µ": 116,
87
+ "<pad>": 2,
88
+ "Å": 132,
89
+ "Ð": 143,
90
+ "5": 23,
91
+ "Ď": 205,
92
+ "e": 71,
93
+ "8": 26,
94
+ "ī": 234,
95
+ "ß": 158,
96
+ "t": 86,
97
+ "ì": 171,
98
+ "Ė": 213,
99
+ "w": 89,
100
+ "T": 54,
101
+ "Ŀ": 254,
102
+ "Ĥ": 227,
103
+ "Ļ": 250,
104
+ "v": 88,
105
+ "ö": 181,
106
+ "ĩ": 232,
107
+ "ġ": 224,
108
+ "į": 238,
109
+ "Ľ": 252,
110
+ "Ö": 149,
111
+ "ı": 240,
112
+ "ĭ": 236,
113
+ "L": 46,
114
+ "2": 20,
115
+ "ł": 257,
116
+ "¹": 120,
117
+ "Ķ": 245,
118
+ "ä": 163,
119
+ "R": 52,
120
+ "ę": 216,
121
+ "~": 96,
122
+ "Ł": 256,
123
+ "»": 122,
124
+ "g": 73,
125
+ "#": 5,
126
+ "č": 204,
127
+ "Ü": 155,
128
+ "h": 74,
129
+ "Ċ": 201,
130
+ "ñ": 176,
131
+ "ķ": 246,
132
+ "ď": 206,
133
+ "Ġ": 223,
134
+ "Y": 59,
135
+ "k": 77,
136
+ "K": 45,
137
+ "û": 186,
138
+ "s": 85,
139
+ "'": 9,
140
+ "Í": 140,
141
+ "Ç": 134,
142
+ "½": 124,
143
+ "º": 121,
144
+ ":": 28,
145
+ "ó": 178,
146
+ ")": 11,
147
+ "M": 47,
148
+ "!": 3,
149
+ "Ĝ": 219,
150
+ "l": 78,
151
+ "\\": 62,
152
+ "ĺ": 249,
153
+ "Û": 154,
154
+ "Ě": 217,
155
+ "ù": 184,
156
+ "{": 93,
157
+ "¤": 100,
158
+ "<": 30,
159
+ "í": 172,
160
+ "¢": 98,
161
+ "þ": 189,
162
+ "r": 84,
163
+ "æ": 165,
164
+ "¶": 117,
165
+ "E": 39,
166
+ "`": 66,
167
+ "Ê": 137,
168
+ "B": 36,
169
+ "¿": 126,
170
+ "Ò": 145,
171
+ "Ï": 142,
172
+ "ÿ": 190,
173
+ "°": 111,
174
+ "S": 53,
175
+ "\"": 4,
176
+ "@": 34,
177
+ "Ă": 193,
178
+ ";": 29,
179
+ "Ĕ": 211,
180
+ "ľ": 253,
181
+ "è": 167,
182
+ "0": 18,
183
+ "P": 50,
184
+ "Ę": 215,
185
+ "¸": 119,
186
+ "Ī": 233,
187
+ "C": 37,
188
+ "p": 82,
189
+ "¯": 110,
190
+ ",": 14,
191
+ "Á": 128,
192
+ "Đ": 207,
193
+ "j": 76,
194
+ "À": 127,
195
+ "ĵ": 244,
196
+ "é": 168,
197
+ "ċ": 202,
198
+ "Ģ": 225,
199
+ "Ğ": 221,
200
+ "Ń": 258,
201
+ "f": 72,
202
+ "X": 58,
203
+ "ė": 214,
204
+ "y": 91,
205
+ "×": 150,
206
+ "©": 105,
207
+ "ij": 242,
208
+ "Ē": 209,
209
+ "ĝ": 220,
210
+ "ĕ": 212,
211
+ "ø": 183,
212
+ "n": 80,
213
+ "Ô": 147,
214
+ "/": 17,
215
+ "Ĺ": 248,
216
+ "Ñ": 144,
217
+ "ħ": 230,
218
+ "Ä": 131,
219
+ "?": 33,
220
+ "«": 107,
221
+ "İ": 239,
222
+ "¾": 125,
223
+ "ú": 185,
224
+ "ý": 188,
225
+ "x": 90,
226
+ "Ą": 195,
227
+ "+": 13,
228
+ "§": 103,
229
+ "a": 67,
230
+ "}": 95,
231
+ ".": 16,
232
+ "<s>": 0,
233
+ "²": 113,
234
+ "ŀ": 255,
235
+ "â": 161,
236
+ "Â": 129,
237
+ "Ý": 156,
238
+ "ã": 162,
239
+ "Ĵ": 243,
240
+ "Ù": 152,
241
+ "(": 10,
242
+ "m": 79,
243
+ "¥": 101,
244
+ "ĥ": 228,
245
+ "7": 25,
246
+ "=": 31,
247
+ "´": 115,
248
+ "Q": 51,
249
+ "N": 48,
250
+ "ĉ": 200,
251
+ "1": 19,
252
+ "÷": 182,
253
+ "¼": 123,
254
+ "Ĭ": 235,
255
+ "D": 38,
256
+ "^": 64,
257
+ "Ć": 197,
258
+ "</s>": 1,
259
+ "Į": 237,
260
+ "Ĉ": 199
261
+ }