gsaltintas commited on
Commit
45f8197
·
verified ·
1 Parent(s): 091f7ab

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. README.md +5 -1
  2. special_tokens_map.json +0 -4
  3. tokenizer.json +29 -47
  4. tokenizer_config.json +0 -20
  5. vocab.json +310 -310
README.md CHANGED
@@ -47,4 +47,8 @@ tokens = tokenizer.encode("Hello, world!")
47
  ## Sample Encoding
48
  | Text | Tokens | Token IDs |
49
  |------|--------|-----------|
50
- | `yirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `y, i, r, m, i, Ġ, i, k, i, +, d, o, k, u, z, =, o, t, u, z` | `91, 75, 84, 79, 75, 223, 75, 77, 75, 3, 70, 81, 77, 87, 92, 4, 81, 86, 87, 92` |
 
 
 
 
 
47
  ## Sample Encoding
48
  | Text | Tokens | Token IDs |
49
  |------|--------|-----------|
50
+ | `yirmi iki+dokuz=otuz bir\ntwenty two+nine=thirty one` | `y, i, r, m, i, Ġ, i, k, i, +, d, o, k, u, z, =, o, t, u, z` | `91, 75, 84, 79, 75, 223, 75, 77, 75, 13, 70, 81, 77, 87, 92, 31, 81, 86, 87, 92` |
51
+
52
+ Command used to create this tokenizer:
53
+ ```bash
54
+ ['/home/gsa/tokenizers2/flexitok/tokenizer_training/train_tokenizers.py', 'algorithm=bpe', 'vocab_size=2000', 'langs=[fas_Arab]', 'data_dir=/scratch/gsa/data/multilingual-addition/', 'output_dir=/scratch/gsa/trained_tokenizers/multilingual_addition', 'pretokenizer=custom:addition', 'number_handling=ltr_3digit', 'add_numbers=false', 'handle_contractions=false', 'unicode_normalization=nfc', 'use_byte_level_regex=false', 'byte_fallback=false', 'strip_zero_width=false', 'cjk_char_split=false', 'add_cjk_chars=false', 'max_lines=-1', 'test_string=yirmi iki+dokuz=otuz bir\\ntwenty two+nine=thirty one', 'hf.publish_to_hf=true', 'hf_repo_prefix=flexitok/', 'hf.hf_repo_id=flexitok/maddition_fas_Arab_2000', 'hf.collections=[flexitok/multilingual_addition_tokenizers]']
special_tokens_map.json CHANGED
@@ -1,8 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "+",
4
- "="
5
- ],
6
  "bos_token": "<s>",
7
  "eos_token": "</s>",
8
  "pad_token": "<pad>"
 
1
  {
 
 
 
 
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
  "pad_token": "<pad>"
tokenizer.json CHANGED
@@ -29,24 +29,6 @@
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
32
- },
33
- {
34
- "id": 3,
35
- "content": "+",
36
- "single_word": false,
37
- "lstrip": false,
38
- "rstrip": false,
39
- "normalized": false,
40
- "special": true
41
- },
42
- {
43
- "id": 4,
44
- "content": "=",
45
- "single_word": false,
46
- "lstrip": false,
47
- "rstrip": false,
48
- "normalized": false,
49
- "special": true
50
  }
51
  ],
52
  "normalizer": {
@@ -99,35 +81,35 @@
99
  "<s>": 0,
100
  "</s>": 1,
101
  "<pad>": 2,
102
- "+": 3,
103
- "=": 4,
104
- "!": 5,
105
- "\"": 6,
106
- "#": 7,
107
- "$": 8,
108
- "%": 9,
109
- "&": 10,
110
- "'": 11,
111
- "(": 12,
112
- ")": 13,
113
- "*": 14,
114
- ",": 15,
115
- "-": 16,
116
- ".": 17,
117
- "/": 18,
118
- "0": 19,
119
- "1": 20,
120
- "2": 21,
121
- "3": 22,
122
- "4": 23,
123
- "5": 24,
124
- "6": 25,
125
- "7": 26,
126
- "8": 27,
127
- "9": 28,
128
- ":": 29,
129
- ";": 30,
130
- "<": 31,
131
  ">": 32,
132
  "?": 33,
133
  "@": 34,
 
29
  "rstrip": false,
30
  "normalized": false,
31
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  }
33
  ],
34
  "normalizer": {
 
81
  "<s>": 0,
82
  "</s>": 1,
83
  "<pad>": 2,
84
+ "!": 3,
85
+ "\"": 4,
86
+ "#": 5,
87
+ "$": 6,
88
+ "%": 7,
89
+ "&": 8,
90
+ "'": 9,
91
+ "(": 10,
92
+ ")": 11,
93
+ "*": 12,
94
+ "+": 13,
95
+ ",": 14,
96
+ "-": 15,
97
+ ".": 16,
98
+ "/": 17,
99
+ "0": 18,
100
+ "1": 19,
101
+ "2": 20,
102
+ "3": 21,
103
+ "4": 22,
104
+ "5": 23,
105
+ "6": 24,
106
+ "7": 25,
107
+ "8": 26,
108
+ "9": 27,
109
+ ":": 28,
110
+ ";": 29,
111
+ "<": 30,
112
+ "=": 31,
113
  ">": 32,
114
  "?": 33,
115
  "@": 34,
tokenizer_config.json CHANGED
@@ -23,28 +23,8 @@
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
- },
27
- "3": {
28
- "content": "+",
29
- "lstrip": false,
30
- "normalized": false,
31
- "rstrip": false,
32
- "single_word": false,
33
- "special": true
34
- },
35
- "4": {
36
- "content": "=",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
  }
43
  },
44
- "additional_special_tokens": [
45
- "+",
46
- "="
47
- ],
48
  "bos_token": "<s>",
49
  "clean_up_tokenization_spaces": false,
50
  "eos_token": "</s>",
 
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  },
 
 
 
 
28
  "bos_token": "<s>",
29
  "clean_up_tokenization_spaces": false,
30
  "eos_token": "</s>",
vocab.json CHANGED
@@ -1,342 +1,342 @@
1
  {
2
- "دÙĪÛĮست": 310,
3
- "ô": 179,
4
- "ÿ": 190,
5
- "µ": 116,
6
- "5": 24,
 
 
 
 
 
 
 
 
7
  "ą": 196,
8
- "ü": 187,
9
- "ý": 188,
10
- "Ġ": 223,
 
 
 
 
 
 
 
 
11
  "î": 173,
12
- "Ùĩز": 292,
13
- "ó": 178,
14
- "Ě": 217,
15
- "Ĩ": 231,
16
- "Æ": 133,
17
- "Ą": 195,
18
- "سÛĮصد": 309,
19
- "ÙĨصد": 302,
20
- "Į": 237,
21
- "ë": 170,
22
- "!": 5,
23
- "=": 4,
24
  "j": 76,
25
- "i": 75,
26
- "ĝ": 220,
27
- "ÙĨÙĪØ²Ø¯Ùĩ": 334,
28
- "£": 99,
29
- "®": 109,
30
- "u": 87,
31
- "پا": 295,
32
- "½": 124,
33
- "ĭ": 236,
34
- "Ĥ": 227,
 
 
 
 
35
  "]": 63,
36
- "U": 55,
37
- "^": 64,
38
- "شش": 287,
39
- "دÙĪ": 282,
40
- "8": 27,
41
- "ı": 240,
42
- "Ó": 146,
43
- "[": 61,
44
- "1": 20,
45
- "Ľ": 252,
46
- "ł": 257,
47
  "å": 164,
48
- "é": 168,
49
- "Ú©": 317,
50
- "ĵ": 244,
 
 
51
  "¦": 102,
52
- "Ù¾ÙĨجاÙĩ": 314,
53
- "Ă": 193,
54
- "f": 72,
 
 
 
 
 
 
 
 
 
55
  "Ø´": 266,
56
- "Ĕ": 211,
57
- "Ė": 213,
58
- "Ļ": 250,
59
- "ö": 181,
60
- "²Ø¯Ùĩ": 320,
61
- "N": 48,
62
- "ÙĨÙĪØ¯": 315,
63
- "V": 56,
64
- "ÙĨ": 267,
65
- "v": 88,
66
- "č": 204,
67
- "º": 121,
68
- "I": 43,
69
- "Û": 154,
70
- "ğ": 222,
71
- "Ē": 209,
 
 
 
 
 
 
72
  "ÙĩÙģØª": 277,
73
- "_": 65,
74
- "Ùĩار": 281,
75
- "Õ": 148,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  "Ü": 155,
77
- "ú": 185,
78
- "د": 261,
79
- "Â": 129,
80
- "ÙĪ": 259,
81
- "Y": 59,
82
- "Ùĩشتاد": 308,
83
- "ÚĨ": 274,
84
- "\"": 6,
85
- "ľ": 253,
86
- "پاÙĨصد": 316,
87
- "ß": 158,
88
- "¬Ø¯Ùĩ": 324,
89
- "o": 81,
90
- ">": 32,
91
- "شاÙĨزدÙĩ": 328,
92
- "سÛĮ": 280,
93
- "h": 74,
94
- "T": 54,
95
- "Ń": 258,
96
- "ð": 175,
97
- "ï": 174,
98
- "¹": 120,
99
- "p": 82,
100
  "c": 69,
101
- "ù": 184,
102
- "ر": 336,
103
- "W": 57,
104
- "ĸ": 247,
105
  "ī": 234,
106
- "ÛĮست": 286,
107
- "&": 10,
108
- "ÚĨÙĩاردÙĩ": 333,
109
- "ŀ": 255,
110
- "ċ": 202,
111
- "¥": 101,
112
- "Ċ": 201,
113
- "ę": 216,
 
 
 
 
 
 
 
 
 
 
 
 
114
  "Ğ": 221,
115
- "°": 111,
116
- "C": 37,
117
- "ÙĨØ": 279,
118
- "ÙĩÙģØªØµØ¯": 305,
119
  "ļ": 251,
120
- "`": 66,
121
- "b": 68,
122
- "į": 238,
123
- "ĥ": 228,
124
- "3": 22,
 
 
 
 
 
 
125
  "S": 53,
126
- "شصت": 301,
127
- "y": 91,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  "ø": 183,
129
- "·": 118,
130
- "đ": 208,
131
- "ª": 106,
132
- "Ī": 233,
133
- "¯": 110,
134
- "ÛĮس": 284,
135
- "ä": 163,
136
- "Ķ": 245,
137
- "P": 50,
138
- "ÛĮÚ©": 318,
139
- "2": 21,
140
- "#": 7,
141
- "Ĵ": 243,
142
- "À": 127,
143
- "ď": 206,
144
- "D": 38,
145
- ".": 17,
146
- "Ď": 205,
147
- "ò": 177,
148
- "È": 135,
149
- "Đ": 207,
150
- "Ĭ": 235,
151
- "ÛĮ": 268,
152
- "è": 167,
153
  "Ù": 152,
154
- "Ùģ": 337,
155
- "§": 103,
156
- "ġ": 224,
157
- "ĉ": 200,
158
- "¾": 125,
 
 
159
  "ă": 194,
160
- "ij": 242,
161
- "ÙĩÙĦ": 303,
162
- "R": 52,
163
- "Ç": 134,
164
- "«": 107,
165
- "زدÙĩ": 323,
166
- "$": 8,
167
- "ز": 291,
168
- "ت": 263,
169
- "B": 36,
170
- "ار": 272,
171
- ";": 30,
172
- "Ħ": 229,
173
- "Ì": 139,
174
- "~": 96,
175
- "É": 136,
176
- "{": 93,
177
- "ê": 169,
178
- "اÙĩ": 300,
179
- "L": 46,
180
- "©": 105,
181
- "\\": 62,
182
- "Q": 51,
183
- "s": 85,
184
- "ÚĨÙĩÙĦ": 304,
185
- "Ùĩشت": 278,
186
- "Ù¾ÙĨج": 290,
187
- "بÛĮست": 297,
188
  "³": 114,
189
- "G": 41,
190
- "ÙĩÙģØªØ§Ø¯": 306,
191
- "ÙĩÙ": 270,
192
  "a": 67,
193
- "X": 58,
194
- ¯Ùĩ": 298,
195
- "Ā": 191,
196
- "J": 44,
197
- "E": 39,
198
- "ÚĨÙĩارصد": 311,
199
- "Z": 60,
200
- "r": 84,
201
- "ç": 166,
202
- "اÙĨزدÙĩ": 327,
203
- "M": 47,
204
- "ÙĨزدÙĩ": 322,
205
- "<": 31,
206
- "ÙĩجدÙĩ": 326,
207
- "ě": 218,
208
- "}": 95,
209
- "¤": 100,
210
- "Þ": 157,
211
- "ÙĨÙĩ": 288,
212
- "¼": 123,
213
- "Ò": 145,
214
- ")": 13,
215
- "صÙ쨱": 339,
216
- "Ùĩشتصد": 307,
217
- "O": 49,
218
- "ÙĨÙĩصد": 313,
219
- "z": 92,
220
- "H": 42,
221
- "±": 112,
222
- "صت": 299,
223
- "Ï": 142,
224
- "Ê": 137,
225
- "Ä": 131,
226
- "پاÙĨزدÙĩ": 335,
227
- "d": 70,
228
- "¨ÛĮست": 296,
229
- "Ŀ": 254,
230
- "ÙĨÙĪ": 294,
231
- "/": 18,
232
- ":": 29,
233
- "<pad>": 2,
234
- "Å": 132,
235
  "ازدÙĩ": 321,
236
- "+": 3,
237
- "ì": 171,
238
- "Ð": 143,
239
- "à": 159,
240
- "x": 90,
241
- ³": 269,
242
- "Ę": 215,
243
- "û": 186,
244
- "q": 83,
245
- "k": 77,
246
- "Č": 203,
247
- "<s>": 0,
248
- "Ñ": 144,
249
- "ÛĮازدÙĩ": 329,
250
- "جدÙĩ": 325,
251
- "¢": 98,
252
- "¡": 97,
253
- "ص": 262,
254
- "ã": 162,
255
- "Ùĩ": 260,
256
  "سÙĩ": 319,
257
- "%": 9,
258
- "Ö": 149,
259
- "'": 11,
260
- "*": 14,
261
- "Ë": 138,
262
- "ć": 198,
263
- "Í": 140,
264
  "F": 40,
265
- "â": 161,
266
- "9": 28,
267
- "Ý": 156,
268
- "ÙĩØ´": 276,
269
- "õ": 180,
270
- "ē": 210,
271
- "¿": 126,
272
  "þ": 189,
273
- "m": 79,
274
- "w": 89,
275
- "ĕ": 212,
276
- "-": 16,
277
- "(": 12,
278
- "á": 160,
279
- "ā": 192,
280
- "ĺ": 249,
281
- "Ù¾": 273,
282
  "سÛĮزدÙĩ": 331,
283
- "Ú": 153,
284
- "×": 150,
285
- "n": 80,
286
- "Ã": 130,
287
- "ñ": 176,
 
 
 
 
 
 
288
  "ششصد": 312,
289
- "»": 122,
290
- "l": 78,
291
- "ė": 214,
292
- "Ø": 151,
293
- "¨": 104,
294
- "ħ": 230,
295
- "¶": 117,
296
- ",": 15,
297
- "ÚĨÙĩار": 285,
298
- "IJ": 241,
299
- "ĩ": 232,
300
- "Ĺ": 248,
301
- "0": 19,
302
- "Ć": 197,
303
- "صد": 265,
304
- "دÙĪØ§Ø²Ø¯Ùĩ": 332,
305
- "اد": 283,
306
- "í": 172,
307
- "ķ": 246,
308
- "÷": 182,
309
  "¸": 119,
310
- "6": 25,
311
- "İ": 239,
312
- "¬": 108,
313
- "?": 33,
314
- "æ": 165,
315
- "²": 113,
316
- "ģ": 226,
317
- "</s>": 1,
318
- "Ùĩزار": 293,
319
- "Ĉ": 199,
320
- "Ô": 147,
321
- "4": 23,
322
- "g": 73,
323
- "@": 34,
324
- "Ł": 256,
325
- "e": 71,
326
- "Ĝ": 219,
327
- "|": 94,
328
- "ÙĩÙģ": 275,
329
  "A": 35,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  "Î": 141,
331
- "K": 45,
332
- "ا": 264,
333
- "Ù¾ÙĨØ": 289,
 
 
334
  "Á": 128,
335
- "7": 26,
336
- "t": 86,
337
- "اØ": 271,
 
 
 
 
 
 
 
 
 
 
 
 
338
  "ÙĩÙ쨝Ùĩ": 330,
339
- "´": 115,
340
- "Ģ": 225,
341
- "صÙģ": 338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
342
  }
 
1
  {
2
+ "z": 92,
3
+ "Ī": 233,
4
+ "Ĺ": 248,
5
+ "Ý": 156,
6
+ "¯": 110,
7
+ "ÛĮازدÙĩ": 329,
8
+ "C": 37,
9
+ "Ľ": 252,
10
+ "شصت": 301,
11
+ "ÙĩØ´": 276,
12
+ "À": 127,
13
+ "ä": 163,
14
+ "Í": 140,
15
  "ą": 196,
16
+ "3": 21,
17
+ "D": 38,
18
+ "ē": 210,
19
+ "\\": 62,
20
+ "y": 91,
21
+ "¬": 108,
22
+ "ÚĨ": 274,
23
+ "ª": 106,
24
+ "±": 112,
25
+ "ÛĮست": 286,
26
+ "Þ": 157,
27
  "î": 173,
28
+ "ć": 198,
29
+ "µ": 116,
30
+ ":": 28,
 
 
 
 
 
 
 
 
 
31
  "j": 76,
32
+ "h": 74,
33
+ "صÙ쨱": 339,
34
+ §": 264,
35
+ "8": 26,
36
+ "v": 88,
37
+ "اد": 283,
38
+ "Ê": 137,
39
+ "Ģ": 225,
40
+ "": 117,
41
+ "</s>": 1,
42
+ "Č": 203,
43
+ "Ë": 138,
44
+ "ó": 178,
45
+ "P": 50,
46
  "]": 63,
47
+ "Ä": 131,
48
+ "4": 22,
49
+ "O": 49,
50
+ ²Ø¯Ùĩ": 323,
51
+ "Ù¾ÙĨج": 290,
52
+ "جدÙĩ": 325,
53
+ "@": 34,
54
+ "*": 12,
 
 
 
55
  "å": 164,
56
+ "ý": 188,
57
+ "ص": 262,
58
+ "Å": 132,
59
+ "Ùĩ": 260,
60
+ "Z": 60,
61
  "¦": 102,
62
+ "ñ": 176,
63
+ "d": 70,
64
+ "Ĵ": 243,
65
+ "¢": 98,
66
+ "R": 52,
67
+ "Ā": 191,
68
+ "%": 7,
69
+ "l": 78,
70
+ "È": 135,
71
+ "ð": 175,
72
+ "IJ": 241,
73
+ "¿": 126,
74
  "Ø´": 266,
75
+ "U": 55,
76
+ "Q": 51,
77
+ "Ô": 147,
78
+ "Ø": 151,
79
+ "5": 23,
80
+ "¤": 100,
81
+ "&": 8,
82
+ "Ď": 205,
83
+ "à": 159,
84
+ "x": 90,
85
+ "Ì": 139,
86
+ "ĸ": 247,
87
+ "[": 61,
88
+ "ij": 242,
89
+ "سÛĮ": 280,
90
+ "Į": 237,
91
+ ".": 16,
92
+ "ÙĨÙĩصد": 313,
93
+ "ĝ": 220,
94
+ "ċ": 202,
95
+ "¨": 104,
96
+ "ı": 240,
97
  "ÙĩÙģØª": 277,
98
+ "ã": 162,
99
+ "ë": 170,
100
+ "Ñ": 144,
101
+ "0": 18,
102
+ "Ħ": 229,
103
+ "ě": 218,
104
+ "Ùĩشت": 278,
105
+ "پاÙĨزدÙĩ": 335,
106
+ "í": 172,
107
+ "Ú": 153,
108
+ "Ù¾": 273,
109
+ "÷": 182,
110
+ "?": 33,
111
+ "ā": 192,
112
+ "J": 44,
113
+ "è": 167,
114
+ "ę": 216,
115
+ "ò": 177,
116
+ "ÙĨÙĩ": 288,
117
+ "ĺ": 249,
118
+ "r": 84,
119
+ "Ċ": 201,
120
+ "بÛĮست": 297,
121
+ "æ": 165,
122
+ "X": 58,
123
  "Ü": 155,
124
+ "e": 71,
125
+ "Ã": 130,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  "c": 69,
127
+ "ł": 257,
128
+ ",": 14,
129
+ "ğ": 222,
130
+ "ÙĩÙĦ": 303,
131
  "ī": 234,
132
+ "سÛĮصد": 309,
133
+ ";": 29,
134
+ "Ă": 193,
135
+ "ار": 272,
136
+ "Û": 154,
137
+ "ĩ": 232,
138
+ "ÚĨÙĩÙĦ": 304,
139
+ "دÙĩ": 298,
140
+ "ķ": 246,
141
+ "Ð": 143,
142
+ "اÙĨزدÙĩ": 327,
143
+ "ĵ": 244,
144
+ "Ć": 197,
145
+ "Ĭ": 235,
146
+ "د": 261,
147
+ "Ė": 213,
148
+ "â": 161,
149
+ "á": 160,
150
+ "Ùģ": 337,
151
+ "đ": 208,
152
  "Ğ": 221,
153
+ "K": 45,
154
+ "پاÙĨصد": 316,
155
+ "2": 20,
 
156
  "ļ": 251,
157
+ "7": 25,
158
+ "č": 204,
159
+ "G": 41,
160
+ ")": 11,
161
+ "ÙĩÙģØªØ§Ø¯": 306,
162
+ "İ": 239,
163
+ "1": 19,
164
+ "دÙĪÛĮست": 310,
165
+ "ï": 174,
166
+ "ß": 158,
167
+ "é": 168,
168
  "S": 53,
169
+ "/": 17,
170
+ "Ï": 142,
171
+ "Ùĩز": 292,
172
+ "+": 13,
173
+ "k": 77,
174
+ "ü": 187,
175
+ "6": 24,
176
+ "E": 39,
177
+ "ÙĨØ": 279,
178
+ "Ö": 149,
179
+ "ÙĨÙĪ": 294,
180
+ "»": 122,
181
+ "½": 124,
182
+ "b": 68,
183
+ "Ě": 217,
184
+ "ÙĩجدÙĩ": 326,
185
  "ø": 183,
186
+ "^": 64,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  "Ù": 152,
188
+ "×": 150,
189
+ "صد": 265,
190
+ "9": 27,
191
+ "Ò": 145,
192
+ "M": 47,
193
+ "į": 238,
194
+ "(": 10,
195
  "ă": 194,
196
+ "-": 15,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  "³": 114,
198
+ "دÙĪØ§Ø²Ø¯Ùĩ": 332,
199
+ "º": 121,
200
+ "õ": 180,
201
  "a": 67,
202
+ "ÙĩÙģ": 275,
203
+ "Ùĩار": 281,
204
+ "ÙĨصد": 302,
205
+ "ÙĪ": 259,
206
+ "p": 82,
207
+ "ÙĩÙ": 270,
208
+ "s": 85,
209
+ "ÚĨÙĩار": 285,
210
+ "ģ": 226,
211
+ "n": 80,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  "ازدÙĩ": 321,
213
+ "¨ÛĮست": 296,
214
+ "Ĕ": 211,
215
+ "ú": 185,
216
+ "ď": 206,
217
+ "دÙĪ": 282,
218
+ "ÙĩÙģØªØµØ¯": 305,
219
+ "Ķ": 245,
220
+ "Ē": 209,
221
+ "ė": 214,
222
+ "Ùĩشتصد": 307,
223
+ "ŀ": 255,
224
+ "پا": 295,
225
+ "~": 96,
226
+ "Ġ": 223,
227
+ "V": 56,
 
 
 
 
 
228
  "سÙĩ": 319,
229
+ "Ú©": 317,
230
+ "I": 43,
231
+ "ç": 166,
232
+ "{": 93,
233
+ "´": 115,
234
+ "²": 113,
235
+ "ĭ": 236,
236
  "F": 40,
 
 
 
 
 
 
 
237
  "þ": 189,
 
 
 
 
 
 
 
 
 
238
  "سÛĮزدÙĩ": 331,
239
+ "B": 36,
240
+ ">": 32,
241
+ "w": 89,
242
+ "ÙĨÙĪØ¯": 315,
243
+ "Ù¾ÙĨجاÙĩ": 314,
244
+ "Ĝ": 219,
245
+ "L": 46,
246
+ "ÿ": 190,
247
+ "Õ": 148,
248
+ "Ç": 134,
249
+ "$": 6,
250
  "ششصد": 312,
251
+ "ĕ": 212,
252
+ "Ùĩشتاد": 308,
253
+ "اØ": 271,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
254
  "¸": 119,
255
+ "¼": 123,
256
+ "ĥ": 228,
257
+ "Ù¾ÙĨØ": 289,
258
+ "=": 31,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  "A": 35,
260
+ "ħ": 230,
261
+ "ê": 169,
262
+ "ì": 171,
263
+ "|": 94,
264
+ "ÚĨÙĩارصد": 311,
265
+ "²Ø¯Ùĩ": 320,
266
+ "}": 95,
267
+ "t": 86,
268
+ "!": 3,
269
+ "ġ": 224,
270
+ "ÛĮÚ©": 318,
271
+ "¥": 101,
272
+ "شش": 287,
273
+ "س": 269,
274
+ "¬Ø¯Ùĩ": 324,
275
  "Î": 141,
276
+ "شاÙĨزدÙĩ": 328,
277
+ "û": 186,
278
+ "ù": 184,
279
+ "«": 107,
280
+ "Đ": 207,
281
  "Á": 128,
282
+ "ÙĨÙĪØ²Ø¯Ùĩ": 334,
283
+ "Ļ": 250,
284
+ "ľ": 253,
285
+ "i": 75,
286
+ "H": 42,
287
+ "q": 83,
288
+ "\"": 4,
289
+ "'": 9,
290
+ "Ùĩزار": 293,
291
+ "ت": 263,
292
+ "<": 30,
293
+ "£": 99,
294
+ "T": 54,
295
+ "Æ": 133,
296
+ "g": 73,
297
  "ÙĩÙ쨝Ùĩ": 330,
298
+ "<pad>": 2,
299
+ "Ó": 146,
300
+ "Y": 59,
301
+ "·": 118,
302
+ "Â": 129,
303
+ "§": 103,
304
+ "¹": 120,
305
+ "_": 65,
306
+ "©": 105,
307
+ "Ń": 258,
308
+ "o": 81,
309
+ "ĉ": 200,
310
+ "Ĩ": 231,
311
+ "N": 48,
312
+ "Ĥ": 227,
313
+ "Ĉ": 199,
314
+ "m": 79,
315
+ "ÛĮس": 284,
316
+ "ÚĨÙĩاردÙĩ": 333,
317
+ "`": 66,
318
+ "f": 72,
319
+ "Ł": 256,
320
+ "صت": 299,
321
+ "ÙĨ": 267,
322
+ "ÛĮ": 268,
323
+ "Ŀ": 254,
324
+ "ÙĨزدÙĩ": 322,
325
+ "¾": 125,
326
+ "ز": 291,
327
+ "ô": 179,
328
+ "#": 5,
329
+ "u": 87,
330
+ "É": 136,
331
+ "¡": 97,
332
+ "W": 57,
333
+ "Ą": 195,
334
+ "®": 109,
335
+ "°": 111,
336
+ "ر": 336,
337
+ "<s>": 0,
338
+ "Ę": 215,
339
+ "صÙģ": 338,
340
+ "ö": 181,
341
+ "اÙĩ": 300
342
  }