lw2333 commited on
Commit
9e94bf9
1 Parent(s): c663312

Training in progress, step 1000

Browse files
added_tokens.json ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|af|>": 570,
3
+ "<|am|>": 577,
4
+ "<|ar|>": 515,
5
+ "<|as|>": 593,
6
+ "<|az|>": 547,
7
+ "<|ba|>": 598,
8
+ "<|be|>": 573,
9
+ "<|bg|>": 535,
10
+ "<|bn|>": 545,
11
+ "<|bo|>": 590,
12
+ "<|br|>": 552,
13
+ "<|bs|>": 558,
14
+ "<|ca|>": 513,
15
+ "<|cs|>": 526,
16
+ "<|cy|>": 540,
17
+ "<|da|>": 528,
18
+ "<|de|>": 504,
19
+ "<|el|>": 524,
20
+ "<|endoftext|>": 500,
21
+ "<|en|>": 502,
22
+ "<|es|>": 505,
23
+ "<|et|>": 550,
24
+ "<|eu|>": 553,
25
+ "<|fa|>": 543,
26
+ "<|fi|>": 520,
27
+ "<|fo|>": 581,
28
+ "<|fr|>": 508,
29
+ "<|gl|>": 562,
30
+ "<|gu|>": 576,
31
+ "<|haw|>": 595,
32
+ "<|ha|>": 597,
33
+ "<|he|>": 522,
34
+ "<|hi|>": 519,
35
+ "<|hr|>": 534,
36
+ "<|ht|>": 582,
37
+ "<|hu|>": 529,
38
+ "<|hy|>": 555,
39
+ "<|id|>": 518,
40
+ "<|is|>": 554,
41
+ "<|it|>": 517,
42
+ "<|ja|>": 509,
43
+ "<|jw|>": 599,
44
+ "<|ka|>": 572,
45
+ "<|kk|>": 559,
46
+ "<|km|>": 566,
47
+ "<|kn|>": 549,
48
+ "<|ko|>": 507,
49
+ "<|la|>": 537,
50
+ "<|lb|>": 588,
51
+ "<|ln|>": 596,
52
+ "<|lo|>": 579,
53
+ "<|lt|>": 536,
54
+ "<|lv|>": 544,
55
+ "<|mg|>": 592,
56
+ "<|mi|>": 538,
57
+ "<|mk|>": 551,
58
+ "<|ml|>": 539,
59
+ "<|mn|>": 557,
60
+ "<|mr|>": 563,
61
+ "<|ms|>": 525,
62
+ "<|mt|>": 586,
63
+ "<|my|>": 589,
64
+ "<|ne|>": 556,
65
+ "<|nl|>": 514,
66
+ "<|nn|>": 585,
67
+ "<|nocaptions|>": 605,
68
+ "<|notimestamps|>": 606,
69
+ "<|no|>": 531,
70
+ "<|oc|>": 571,
71
+ "<|pa|>": 564,
72
+ "<|pl|>": 512,
73
+ "<|ps|>": 583,
74
+ "<|pt|>": 510,
75
+ "<|ro|>": 527,
76
+ "<|ru|>": 506,
77
+ "<|sa|>": 587,
78
+ "<|sd|>": 575,
79
+ "<|si|>": 565,
80
+ "<|sk|>": 541,
81
+ "<|sl|>": 548,
82
+ "<|sn|>": 567,
83
+ "<|so|>": 569,
84
+ "<|sq|>": 560,
85
+ "<|sr|>": 546,
86
+ "<|startoflm|>": 603,
87
+ "<|startofprev|>": 604,
88
+ "<|startoftranscript|>": 501,
89
+ "<|su|>": 600,
90
+ "<|sv|>": 516,
91
+ "<|sw|>": 561,
92
+ "<|ta|>": 530,
93
+ "<|te|>": 542,
94
+ "<|tg|>": 574,
95
+ "<|th|>": 532,
96
+ "<|tk|>": 584,
97
+ "<|tl|>": 591,
98
+ "<|transcribe|>": 602,
99
+ "<|translate|>": 601,
100
+ "<|tr|>": 511,
101
+ "<|tt|>": 594,
102
+ "<|uk|>": 523,
103
+ "<|ur|>": 533,
104
+ "<|uz|>": 580,
105
+ "<|vi|>": 521,
106
+ "<|yi|>": 578,
107
+ "<|yo|>": 568,
108
+ "<|zh|>": 503
109
+ }
config.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "openai/whisper-small",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": [
11
+ 220,
12
+ 50257
13
+ ],
14
+ "bos_token_id": 50257,
15
+ "classifier_proj_size": 256,
16
+ "d_model": 768,
17
+ "decoder_attention_heads": 12,
18
+ "decoder_ffn_dim": 3072,
19
+ "decoder_layerdrop": 0.0,
20
+ "decoder_layers": 12,
21
+ "decoder_start_token_id": 50258,
22
+ "dropout": 0.0,
23
+ "encoder_attention_heads": 12,
24
+ "encoder_ffn_dim": 3072,
25
+ "encoder_layerdrop": 0.0,
26
+ "encoder_layers": 12,
27
+ "eos_token_id": 50257,
28
+ "forced_decoder_ids": null,
29
+ "init_std": 0.02,
30
+ "is_encoder_decoder": true,
31
+ "mask_feature_length": 10,
32
+ "mask_feature_min_masks": 0,
33
+ "mask_feature_prob": 0.0,
34
+ "mask_time_length": 10,
35
+ "mask_time_min_masks": 2,
36
+ "mask_time_prob": 0.05,
37
+ "max_length": 100,
38
+ "max_source_positions": 1500,
39
+ "max_target_positions": 448,
40
+ "median_filter_width": 7,
41
+ "model_type": "whisper",
42
+ "num_hidden_layers": 12,
43
+ "num_mel_bins": 80,
44
+ "pad_token_id": 50257,
45
+ "scale_embedding": false,
46
+ "suppress_tokens": [],
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.34.0",
49
+ "use_cache": false,
50
+ "use_weighted_layer_sum": false,
51
+ "vocab_size": 51865
52
+ }
merges.txt ADDED
@@ -0,0 +1,447 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #version: 0.2
2
+ č Ċ
3
+ Ġ čĊ
4
+ 5 3
5
+ 1 1
6
+ 4 53
7
+ 53 3
8
+ 4 2
9
+ Ç ¾
10
+ 2 4
11
+ 2 1
12
+ É Ķ
13
+ Å ĭ
14
+ t s
15
+ É ¬
16
+ Ġ Ǿ
17
+ a i
18
+ Ġ l
19
+ Ġ k
20
+ ÉĶ u
21
+ h u
22
+ É Ĵ
23
+ Ê Ķ
24
+ 1 3
25
+ a u
26
+ u a
27
+ Ġ n
28
+ Ġ ɬ
29
+ ts h
30
+ É ¤
31
+ i a
32
+ i Åĭ
33
+ É ¯
34
+ ɤ ɯ
35
+ k h
36
+ Ġ m
37
+ Ġ t
38
+ Ġ ts
39
+ p u
40
+ 5 5
41
+ a Åĭ
42
+ u i
43
+ i e
44
+ Ġ p
45
+ i u
46
+ o u
47
+ É Ľ
48
+ o i
49
+ Ã ¸
50
+ ie u
51
+ t h
52
+ Ġl i
53
+ hu a
54
+ Ì ĥ
55
+ k e
56
+ k u
57
+ Ġ h
58
+ 4 1
59
+ Ġl au
60
+ 4 42
61
+ p h
62
+ ĠǾ i
63
+ ĠǾ u
64
+ e ÊĶ
65
+ ÉĶ Åĭ
66
+ e Åĭ
67
+ i ÊĶ
68
+ p e
69
+ Ġk h
70
+ ÉĶ ÊĶ
71
+ 4 4
72
+ 5 44
73
+ ɬ ia
74
+ Ġm a
75
+ Ġ Åĭ
76
+ Ġn iÅĭ
77
+ hu ai
78
+ k y
79
+ p ai
80
+ hu e
81
+ hu ɤɯ
82
+ hu oi
83
+ h ÉĶu
84
+ i au
85
+ Ġ tsh
86
+ ĠǾ au
87
+ t y
88
+ t ua
89
+ Ã £
90
+ h y
91
+ k ua
92
+ t ÉĶu
93
+ l i
94
+ Ǿ e
95
+ hu i
96
+ Ġp h
97
+ ĠǾ ua
98
+ Ġn i
99
+ Ġɬ e
100
+ kh e
101
+ t e
102
+ ĠǾ ÉĴ
103
+ Ġk u
104
+ Ǿ ia
105
+ Ġn au
106
+ ã Ģ
107
+ Ġ ãĢ
108
+ ĠǾ a
109
+ Ġk ai
110
+ ĠãĢ Ĥ
111
+ i n
112
+ Ġl e
113
+ Ġl ui
114
+ Ġk ÉĶ
115
+ Ġɬ y
116
+ 2 3
117
+ t i
118
+ Ǿ ÉĶu
119
+ Ġl ai
120
+ Ġk ÉĴ
121
+ ÉĽ u
122
+ ĠǾ y
123
+ kh ÉĶu
124
+ h e
125
+ k ÉĴ
126
+ Ġk a
127
+ Ġts y
128
+ o Åĭ
129
+ ɬ e
130
+ ɬ i
131
+ ɬ y
132
+ ɬ ua
133
+ ɬ ø
134
+ ÉĴ ÊĶ
135
+ Ġɬ i
136
+ Ġts i
137
+ pu e
138
+ pu ɤɯ
139
+ h ai
140
+ k ai
141
+ p ÉĶu
142
+ ts ia
143
+ Ġl ÉĽ
144
+ Ġt e
145
+ pu ai
146
+ pu oi
147
+ ph ÉĶu
148
+ a ÊĶ
149
+ p i
150
+ u ai
151
+ ts y
152
+ h i
153
+ k i
154
+ p ÉĴ
155
+ ï ¼
156
+ Ġ ï¼
157
+ Ǿ ua
158
+ ts ui
159
+ ĠǾ aÅĭ
160
+ Ġk ua
161
+ tsh y
162
+ Ġt ai
163
+ m a
164
+ u e
165
+ Î ²
166
+ Ġ β
167
+ Ǿ ÉĶ
168
+ Ǿ ÉĴ
169
+ ɬ o
170
+ ɤ Åĭ
171
+ kh ÉĴ
172
+ Ġp o
173
+ Ġli ÊĶ
174
+ h ÉĴ
175
+ k ÉĶ
176
+ k ÉĶu
177
+ k ieu
178
+ ts o
179
+ ĠǾ iu
180
+ ĠǾ ÉĽ
181
+ Ġl eÊĶ
182
+ Ġɬ ia
183
+ tsh i
184
+ tsh ai
185
+ kh y
186
+ ie Åĭ
187
+ k o
188
+ l e
189
+ l ÉĶu
190
+ p iÅĭ
191
+ Ä ©
192
+ Ǿ i
193
+ Ǿ au
194
+ Ǿ iu
195
+ ɬ u
196
+ ĠǾ ÉĶu
197
+ kh i
198
+ Ġh a
199
+ Ġh ÉĴ
200
+ h o
201
+ l o
202
+ l ui
203
+ p a
204
+ p o
205
+ p ÉĶ
206
+ Ǿ aÅĭ
207
+ ts ÉĶu
208
+ ĠǾ e
209
+ tsh iu
210
+ Ġm ai
211
+ Ġts ia
212
+ ÉĽ Åĭ
213
+ h ÉĶ
214
+ p ua
215
+ t hua
216
+ u ɤɯ
217
+ u oi
218
+ ts e
219
+ ts i
220
+ ĠǾ iÅĭ
221
+ Ġk o
222
+ Ġk au
223
+ Ġm i
224
+ th ÉĶu
225
+ Ġï¼ Ł
226
+ h ou
227
+ n i
228
+ t a
229
+ Å ©
230
+ ɬ ieu
231
+ ĠǾ ÉĶÅĭ
232
+ Ġk y
233
+ Ġk ÉĶu
234
+ Ġn ui
235
+ Ġɬ ÉĶu
236
+ Ġɬ iÊĶ
237
+ Ġm ua
238
+ Ġt eÅĭ
239
+ th i
240
+ th ai
241
+ l y
242
+ t ou
243
+ t ø
244
+ Åĭ ÉĶu
245
+ ts ieu
246
+ ɬ ai
247
+ Ġl aÅĭ
248
+ Ġl iu
249
+ tsh ia
250
+ ɤ ÊĶ
251
+ kh ÉĶ
252
+ Ġp ai
253
+ ku e
254
+ ku ɤɯ
255
+ ku oi
256
+ Ġh ÉĶ
257
+ Ġkh ai
258
+ ĠǾÉĴ Åĭ
259
+ h a
260
+ k hui
261
+ k aÊĶ
262
+ m iÅĭ
263
+ p hua
264
+ t o
265
+ t ai
266
+ t ui
267
+ t hui
268
+ Ǿ ai
269
+ Ǿ ou
270
+ ts ai
271
+ ts hui
272
+ ɬ iau
273
+ ĠǾ ia
274
+ Ġl o
275
+ Ġk i
276
+ Ġk eÊĶ
277
+ Ġn ai
278
+ Ġɬ a
279
+ tsh ø
280
+ Ġm ÉĴ
281
+ Ġt he
282
+ th iu
283
+ ku ai
284
+ ph ai
285
+ ph aÅĭ
286
+ ĠǾu ai
287
+ Ġkh i
288
+ ĠÅĭ ua
289
+ Ġtsh y
290
+ Ġph i
291
+ k iÅĭ
292
+ t ÉĴ
293
+ t iu
294
+ ɬ aÅĭ
295
+ ĠǾ ÉĶ
296
+ ĠǾ ou
297
+ Ġl y
298
+ Ġn e
299
+ Ġn u
300
+ Ġɬ ieu
301
+ tsh a
302
+ Ġts e
303
+ Ġts ui
304
+ Ġp ɤɯ
305
+ ĠǾu e
306
+ ĠǾu oi
307
+ ĠÅĭ iÅĭ
308
+ Ġku ɤɯ
309
+ ĠlÉĽ ÊĶ
310
+ k ø
311
+ k in
312
+ l ÉĴ
313
+ l iÊĶ
314
+ m ÉĴ
315
+ n ÉĶu
316
+ p ou
317
+ p huai
318
+ p hue
319
+ p huɤɯ
320
+ p huoi
321
+ t ÉĶ
322
+ Å ĵ
323
+ Ġ pu
324
+ Ǿ y
325
+ ɬ ÉĶu
326
+ ĠǾ o
327
+ Ġl ÉĴ
328
+ Ġl ia
329
+ Ġk hu
330
+ Ġk aÅĭ
331
+ Ġk ÉĶÅĭ
332
+ Ġn ÉĴ
333
+ Ġn aÅĭ
334
+ Ġn ø
335
+ Ġn ieu
336
+ Ġɬ ÉĶ
337
+ tsh ÉĶ
338
+ tsh ÉĴ
339
+ tsh ÉĴÊĶ
340
+ kh ou
341
+ Ġm iÅĭ
342
+ Ġt i
343
+ Ġt ÉĴ
344
+ Ġt au
345
+ Ġts ÉĶ
346
+ Ġts eÊĶ
347
+ Ġp ÉĶÊĶ
348
+ ÉĽ n
349
+ th y
350
+ th iÅĭ
351
+ ph ou
352
+ ĠÅĭ ÉĶu
353
+ hy ÉĴ
354
+ h ia
355
+ i Å©
356
+ k ɤɯ
357
+ k iau
358
+ l ø
359
+ p ɤɯ
360
+ Ġ hu
361
+ Ǿ o
362
+ Ǿ u
363
+ Ǿ iÅĭ
364
+ ts ɤɯ
365
+ ɬ iu
366
+ ɬ ÉĶÊĶ
367
+ ĠǾ eÊĶ
368
+ Ġl eÅĭ
369
+ Ġk ia
370
+ Ġn y
371
+ Ġɬ u
372
+ Ġɬ ÉĴ
373
+ tsh au
374
+ tsh eÅĭ
375
+ kh ieu
376
+ Ġm ÉĶ
377
+ Ġm ia
378
+ Ġm uai
379
+ Ġt y
380
+ Ġts ieu
381
+ ie ÊĶ
382
+ ø Åĭ
383
+ Ġh iÅ©
384
+ ĠǾi ÊĶ
385
+ ĠǾi au
386
+ ĠǾu ã
387
+ hy a
388
+ hy ÉĶ
389
+ Ġku ai
390
+ ĠǾy Åĭ
391
+ ki ÉĽu
392
+ Ġï¼ Į
393
+ ĠÇ¾ÉĽ ÊĶ
394
+ Ġko ÊĶ
395
+ a m
396
+ a n
397
+ a kieu
398
+ k ou
399
+ k hue
400
+ k huɤɯ
401
+ l ÉĶ
402
+ l ua
403
+ l ɤɯ
404
+ l iu
405
+ l ou
406
+ m ÉĶ
407
+ m ui
408
+ m in
409
+ n iÅĭ
410
+ o n
411
+ p au
412
+ p ÉĽ
413
+ p eÊĶ
414
+ t iÅĭ
415
+ t eÅĭ
416
+ Ġ hua
417
+ ts ÉĶ
418
+ ts ÉĴ
419
+ ts ou
420
+ ts iau
421
+ ɬ iÊĶ
422
+ Ġl ua
423
+ Ġk iÅĭ
424
+ Ġk hua
425
+ Ġn ÉĶ
426
+ Ġn ua
427
+ Ġn ÉĶÅĭ
428
+ Ġɬ o
429
+ Ġɬ ui
430
+ Ġɬ ÉĶÊĶ
431
+ tsh aÅĭ
432
+ kh a
433
+ Ġm e
434
+ Ġm ue
435
+ Ġt h
436
+ Ġt ÉĶ
437
+ Ġt ua
438
+ Ġt ui
439
+ Ġts ÉĴ
440
+ Ġts iÅĭ
441
+ Ġts iÊĶ
442
+ Ġts ÉĴÊĶ
443
+ Ġp iÊĶ
444
+ ø n
445
+ ø ÊĶ
446
+ th au
447
+ th aÅĭ
preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb6257aac8ce30ff3007460bb406fc6fa0f80341bee9ca48397eaa895bb38716
3
+ size 967103174
special_tokens_map.json ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "<|startoftranscript|>",
5
+ "<|en|>",
6
+ "<|zh|>",
7
+ "<|de|>",
8
+ "<|es|>",
9
+ "<|ru|>",
10
+ "<|ko|>",
11
+ "<|fr|>",
12
+ "<|ja|>",
13
+ "<|pt|>",
14
+ "<|tr|>",
15
+ "<|pl|>",
16
+ "<|ca|>",
17
+ "<|nl|>",
18
+ "<|ar|>",
19
+ "<|sv|>",
20
+ "<|it|>",
21
+ "<|id|>",
22
+ "<|hi|>",
23
+ "<|fi|>",
24
+ "<|vi|>",
25
+ "<|he|>",
26
+ "<|uk|>",
27
+ "<|el|>",
28
+ "<|ms|>",
29
+ "<|cs|>",
30
+ "<|ro|>",
31
+ "<|da|>",
32
+ "<|hu|>",
33
+ "<|ta|>",
34
+ "<|no|>",
35
+ "<|th|>",
36
+ "<|ur|>",
37
+ "<|hr|>",
38
+ "<|bg|>",
39
+ "<|lt|>",
40
+ "<|la|>",
41
+ "<|mi|>",
42
+ "<|ml|>",
43
+ "<|cy|>",
44
+ "<|sk|>",
45
+ "<|te|>",
46
+ "<|fa|>",
47
+ "<|lv|>",
48
+ "<|bn|>",
49
+ "<|sr|>",
50
+ "<|az|>",
51
+ "<|sl|>",
52
+ "<|kn|>",
53
+ "<|et|>",
54
+ "<|mk|>",
55
+ "<|br|>",
56
+ "<|eu|>",
57
+ "<|is|>",
58
+ "<|hy|>",
59
+ "<|ne|>",
60
+ "<|mn|>",
61
+ "<|bs|>",
62
+ "<|kk|>",
63
+ "<|sq|>",
64
+ "<|sw|>",
65
+ "<|gl|>",
66
+ "<|mr|>",
67
+ "<|pa|>",
68
+ "<|si|>",
69
+ "<|km|>",
70
+ "<|sn|>",
71
+ "<|yo|>",
72
+ "<|so|>",
73
+ "<|af|>",
74
+ "<|oc|>",
75
+ "<|ka|>",
76
+ "<|be|>",
77
+ "<|tg|>",
78
+ "<|sd|>",
79
+ "<|gu|>",
80
+ "<|am|>",
81
+ "<|yi|>",
82
+ "<|lo|>",
83
+ "<|uz|>",
84
+ "<|fo|>",
85
+ "<|ht|>",
86
+ "<|ps|>",
87
+ "<|tk|>",
88
+ "<|nn|>",
89
+ "<|mt|>",
90
+ "<|sa|>",
91
+ "<|lb|>",
92
+ "<|my|>",
93
+ "<|bo|>",
94
+ "<|tl|>",
95
+ "<|mg|>",
96
+ "<|as|>",
97
+ "<|tt|>",
98
+ "<|haw|>",
99
+ "<|ln|>",
100
+ "<|ha|>",
101
+ "<|ba|>",
102
+ "<|jw|>",
103
+ "<|su|>",
104
+ "<|translate|>",
105
+ "<|transcribe|>",
106
+ "<|startoflm|>",
107
+ "<|startofprev|>",
108
+ "<|nocaptions|>",
109
+ "<|notimestamps|>"
110
+ ],
111
+ "bos_token": "<|endoftext|>",
112
+ "eos_token": "<|endoftext|>",
113
+ "pad_token": "<|endoftext|>",
114
+ "unk_token": "<|endoftext|>"
115
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,978 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "500": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "501": {
13
+ "content": "<|startoftranscript|>",
14
+ "lstrip": true,
15
+ "normalized": false,
16
+ "rstrip": true,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "502": {
21
+ "content": "<|en|>",
22
+ "lstrip": true,
23
+ "normalized": false,
24
+ "rstrip": true,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "503": {
29
+ "content": "<|zh|>",
30
+ "lstrip": true,
31
+ "normalized": false,
32
+ "rstrip": true,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "504": {
37
+ "content": "<|de|>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": true,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "505": {
45
+ "content": "<|es|>",
46
+ "lstrip": true,
47
+ "normalized": false,
48
+ "rstrip": true,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "506": {
53
+ "content": "<|ru|>",
54
+ "lstrip": true,
55
+ "normalized": false,
56
+ "rstrip": true,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "507": {
61
+ "content": "<|ko|>",
62
+ "lstrip": true,
63
+ "normalized": false,
64
+ "rstrip": true,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "508": {
69
+ "content": "<|fr|>",
70
+ "lstrip": true,
71
+ "normalized": false,
72
+ "rstrip": true,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "509": {
77
+ "content": "<|ja|>",
78
+ "lstrip": true,
79
+ "normalized": false,
80
+ "rstrip": true,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "510": {
85
+ "content": "<|pt|>",
86
+ "lstrip": true,
87
+ "normalized": false,
88
+ "rstrip": true,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "511": {
93
+ "content": "<|tr|>",
94
+ "lstrip": true,
95
+ "normalized": false,
96
+ "rstrip": true,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "512": {
101
+ "content": "<|pl|>",
102
+ "lstrip": true,
103
+ "normalized": false,
104
+ "rstrip": true,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "513": {
109
+ "content": "<|ca|>",
110
+ "lstrip": true,
111
+ "normalized": false,
112
+ "rstrip": true,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "514": {
117
+ "content": "<|nl|>",
118
+ "lstrip": true,
119
+ "normalized": false,
120
+ "rstrip": true,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "515": {
125
+ "content": "<|ar|>",
126
+ "lstrip": true,
127
+ "normalized": false,
128
+ "rstrip": true,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "516": {
133
+ "content": "<|sv|>",
134
+ "lstrip": true,
135
+ "normalized": false,
136
+ "rstrip": true,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "517": {
141
+ "content": "<|it|>",
142
+ "lstrip": true,
143
+ "normalized": false,
144
+ "rstrip": true,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "518": {
149
+ "content": "<|id|>",
150
+ "lstrip": true,
151
+ "normalized": false,
152
+ "rstrip": true,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "519": {
157
+ "content": "<|hi|>",
158
+ "lstrip": true,
159
+ "normalized": false,
160
+ "rstrip": true,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "520": {
165
+ "content": "<|fi|>",
166
+ "lstrip": true,
167
+ "normalized": false,
168
+ "rstrip": true,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "521": {
173
+ "content": "<|vi|>",
174
+ "lstrip": true,
175
+ "normalized": false,
176
+ "rstrip": true,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "522": {
181
+ "content": "<|he|>",
182
+ "lstrip": true,
183
+ "normalized": false,
184
+ "rstrip": true,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "523": {
189
+ "content": "<|uk|>",
190
+ "lstrip": true,
191
+ "normalized": false,
192
+ "rstrip": true,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "524": {
197
+ "content": "<|el|>",
198
+ "lstrip": true,
199
+ "normalized": false,
200
+ "rstrip": true,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "525": {
205
+ "content": "<|ms|>",
206
+ "lstrip": true,
207
+ "normalized": false,
208
+ "rstrip": true,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "526": {
213
+ "content": "<|cs|>",
214
+ "lstrip": true,
215
+ "normalized": false,
216
+ "rstrip": true,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "527": {
221
+ "content": "<|ro|>",
222
+ "lstrip": true,
223
+ "normalized": false,
224
+ "rstrip": true,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "528": {
229
+ "content": "<|da|>",
230
+ "lstrip": true,
231
+ "normalized": false,
232
+ "rstrip": true,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "529": {
237
+ "content": "<|hu|>",
238
+ "lstrip": true,
239
+ "normalized": false,
240
+ "rstrip": true,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "530": {
245
+ "content": "<|ta|>",
246
+ "lstrip": true,
247
+ "normalized": false,
248
+ "rstrip": true,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "531": {
253
+ "content": "<|no|>",
254
+ "lstrip": true,
255
+ "normalized": false,
256
+ "rstrip": true,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "532": {
261
+ "content": "<|th|>",
262
+ "lstrip": true,
263
+ "normalized": false,
264
+ "rstrip": true,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "533": {
269
+ "content": "<|ur|>",
270
+ "lstrip": true,
271
+ "normalized": false,
272
+ "rstrip": true,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "534": {
277
+ "content": "<|hr|>",
278
+ "lstrip": true,
279
+ "normalized": false,
280
+ "rstrip": true,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "535": {
285
+ "content": "<|bg|>",
286
+ "lstrip": true,
287
+ "normalized": false,
288
+ "rstrip": true,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "536": {
293
+ "content": "<|lt|>",
294
+ "lstrip": true,
295
+ "normalized": false,
296
+ "rstrip": true,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "537": {
301
+ "content": "<|la|>",
302
+ "lstrip": true,
303
+ "normalized": false,
304
+ "rstrip": true,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "538": {
309
+ "content": "<|mi|>",
310
+ "lstrip": true,
311
+ "normalized": false,
312
+ "rstrip": true,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "539": {
317
+ "content": "<|ml|>",
318
+ "lstrip": true,
319
+ "normalized": false,
320
+ "rstrip": true,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "540": {
325
+ "content": "<|cy|>",
326
+ "lstrip": true,
327
+ "normalized": false,
328
+ "rstrip": true,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "541": {
333
+ "content": "<|sk|>",
334
+ "lstrip": true,
335
+ "normalized": false,
336
+ "rstrip": true,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "542": {
341
+ "content": "<|te|>",
342
+ "lstrip": true,
343
+ "normalized": false,
344
+ "rstrip": true,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "543": {
349
+ "content": "<|fa|>",
350
+ "lstrip": true,
351
+ "normalized": false,
352
+ "rstrip": true,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "544": {
357
+ "content": "<|lv|>",
358
+ "lstrip": true,
359
+ "normalized": false,
360
+ "rstrip": true,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "545": {
365
+ "content": "<|bn|>",
366
+ "lstrip": true,
367
+ "normalized": false,
368
+ "rstrip": true,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "546": {
373
+ "content": "<|sr|>",
374
+ "lstrip": true,
375
+ "normalized": false,
376
+ "rstrip": true,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "547": {
381
+ "content": "<|az|>",
382
+ "lstrip": true,
383
+ "normalized": false,
384
+ "rstrip": true,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "548": {
389
+ "content": "<|sl|>",
390
+ "lstrip": true,
391
+ "normalized": false,
392
+ "rstrip": true,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "549": {
397
+ "content": "<|kn|>",
398
+ "lstrip": true,
399
+ "normalized": false,
400
+ "rstrip": true,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "550": {
405
+ "content": "<|et|>",
406
+ "lstrip": true,
407
+ "normalized": false,
408
+ "rstrip": true,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "551": {
413
+ "content": "<|mk|>",
414
+ "lstrip": true,
415
+ "normalized": false,
416
+ "rstrip": true,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "552": {
421
+ "content": "<|br|>",
422
+ "lstrip": true,
423
+ "normalized": false,
424
+ "rstrip": true,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "553": {
429
+ "content": "<|eu|>",
430
+ "lstrip": true,
431
+ "normalized": false,
432
+ "rstrip": true,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "554": {
437
+ "content": "<|is|>",
438
+ "lstrip": true,
439
+ "normalized": false,
440
+ "rstrip": true,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "555": {
445
+ "content": "<|hy|>",
446
+ "lstrip": true,
447
+ "normalized": false,
448
+ "rstrip": true,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "556": {
453
+ "content": "<|ne|>",
454
+ "lstrip": true,
455
+ "normalized": false,
456
+ "rstrip": true,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "557": {
461
+ "content": "<|mn|>",
462
+ "lstrip": true,
463
+ "normalized": false,
464
+ "rstrip": true,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "558": {
469
+ "content": "<|bs|>",
470
+ "lstrip": true,
471
+ "normalized": false,
472
+ "rstrip": true,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "559": {
477
+ "content": "<|kk|>",
478
+ "lstrip": true,
479
+ "normalized": false,
480
+ "rstrip": true,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "560": {
485
+ "content": "<|sq|>",
486
+ "lstrip": true,
487
+ "normalized": false,
488
+ "rstrip": true,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "561": {
493
+ "content": "<|sw|>",
494
+ "lstrip": true,
495
+ "normalized": false,
496
+ "rstrip": true,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "562": {
501
+ "content": "<|gl|>",
502
+ "lstrip": true,
503
+ "normalized": false,
504
+ "rstrip": true,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "563": {
509
+ "content": "<|mr|>",
510
+ "lstrip": true,
511
+ "normalized": false,
512
+ "rstrip": true,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "564": {
517
+ "content": "<|pa|>",
518
+ "lstrip": true,
519
+ "normalized": false,
520
+ "rstrip": true,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "565": {
525
+ "content": "<|si|>",
526
+ "lstrip": true,
527
+ "normalized": false,
528
+ "rstrip": true,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "566": {
533
+ "content": "<|km|>",
534
+ "lstrip": true,
535
+ "normalized": false,
536
+ "rstrip": true,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "567": {
541
+ "content": "<|sn|>",
542
+ "lstrip": true,
543
+ "normalized": false,
544
+ "rstrip": true,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "568": {
549
+ "content": "<|yo|>",
550
+ "lstrip": true,
551
+ "normalized": false,
552
+ "rstrip": true,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "569": {
557
+ "content": "<|so|>",
558
+ "lstrip": true,
559
+ "normalized": false,
560
+ "rstrip": true,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "570": {
565
+ "content": "<|af|>",
566
+ "lstrip": true,
567
+ "normalized": false,
568
+ "rstrip": true,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "571": {
573
+ "content": "<|oc|>",
574
+ "lstrip": true,
575
+ "normalized": false,
576
+ "rstrip": true,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "572": {
581
+ "content": "<|ka|>",
582
+ "lstrip": true,
583
+ "normalized": false,
584
+ "rstrip": true,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "573": {
589
+ "content": "<|be|>",
590
+ "lstrip": true,
591
+ "normalized": false,
592
+ "rstrip": true,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "574": {
597
+ "content": "<|tg|>",
598
+ "lstrip": true,
599
+ "normalized": false,
600
+ "rstrip": true,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "575": {
605
+ "content": "<|sd|>",
606
+ "lstrip": true,
607
+ "normalized": false,
608
+ "rstrip": true,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "576": {
613
+ "content": "<|gu|>",
614
+ "lstrip": true,
615
+ "normalized": false,
616
+ "rstrip": true,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "577": {
621
+ "content": "<|am|>",
622
+ "lstrip": true,
623
+ "normalized": false,
624
+ "rstrip": true,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "578": {
629
+ "content": "<|yi|>",
630
+ "lstrip": true,
631
+ "normalized": false,
632
+ "rstrip": true,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "579": {
637
+ "content": "<|lo|>",
638
+ "lstrip": true,
639
+ "normalized": false,
640
+ "rstrip": true,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "580": {
645
+ "content": "<|uz|>",
646
+ "lstrip": true,
647
+ "normalized": false,
648
+ "rstrip": true,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "581": {
653
+ "content": "<|fo|>",
654
+ "lstrip": true,
655
+ "normalized": false,
656
+ "rstrip": true,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "582": {
661
+ "content": "<|ht|>",
662
+ "lstrip": true,
663
+ "normalized": false,
664
+ "rstrip": true,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "583": {
669
+ "content": "<|ps|>",
670
+ "lstrip": true,
671
+ "normalized": false,
672
+ "rstrip": true,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "584": {
677
+ "content": "<|tk|>",
678
+ "lstrip": true,
679
+ "normalized": false,
680
+ "rstrip": true,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "585": {
685
+ "content": "<|nn|>",
686
+ "lstrip": true,
687
+ "normalized": false,
688
+ "rstrip": true,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "586": {
693
+ "content": "<|mt|>",
694
+ "lstrip": true,
695
+ "normalized": false,
696
+ "rstrip": true,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "587": {
701
+ "content": "<|sa|>",
702
+ "lstrip": true,
703
+ "normalized": false,
704
+ "rstrip": true,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "588": {
709
+ "content": "<|lb|>",
710
+ "lstrip": true,
711
+ "normalized": false,
712
+ "rstrip": true,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "589": {
717
+ "content": "<|my|>",
718
+ "lstrip": true,
719
+ "normalized": false,
720
+ "rstrip": true,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "590": {
725
+ "content": "<|bo|>",
726
+ "lstrip": true,
727
+ "normalized": false,
728
+ "rstrip": true,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "591": {
733
+ "content": "<|tl|>",
734
+ "lstrip": true,
735
+ "normalized": false,
736
+ "rstrip": true,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "592": {
741
+ "content": "<|mg|>",
742
+ "lstrip": true,
743
+ "normalized": false,
744
+ "rstrip": true,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "593": {
749
+ "content": "<|as|>",
750
+ "lstrip": true,
751
+ "normalized": false,
752
+ "rstrip": true,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "594": {
757
+ "content": "<|tt|>",
758
+ "lstrip": true,
759
+ "normalized": false,
760
+ "rstrip": true,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "595": {
765
+ "content": "<|haw|>",
766
+ "lstrip": true,
767
+ "normalized": false,
768
+ "rstrip": true,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "596": {
773
+ "content": "<|ln|>",
774
+ "lstrip": true,
775
+ "normalized": false,
776
+ "rstrip": true,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "597": {
781
+ "content": "<|ha|>",
782
+ "lstrip": true,
783
+ "normalized": false,
784
+ "rstrip": true,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "598": {
789
+ "content": "<|ba|>",
790
+ "lstrip": true,
791
+ "normalized": false,
792
+ "rstrip": true,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "599": {
797
+ "content": "<|jw|>",
798
+ "lstrip": true,
799
+ "normalized": false,
800
+ "rstrip": true,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "600": {
805
+ "content": "<|su|>",
806
+ "lstrip": true,
807
+ "normalized": false,
808
+ "rstrip": true,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "601": {
813
+ "content": "<|translate|>",
814
+ "lstrip": true,
815
+ "normalized": false,
816
+ "rstrip": true,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "602": {
821
+ "content": "<|transcribe|>",
822
+ "lstrip": true,
823
+ "normalized": false,
824
+ "rstrip": true,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "603": {
829
+ "content": "<|startoflm|>",
830
+ "lstrip": true,
831
+ "normalized": false,
832
+ "rstrip": true,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "604": {
837
+ "content": "<|startofprev|>",
838
+ "lstrip": true,
839
+ "normalized": false,
840
+ "rstrip": true,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "605": {
845
+ "content": "<|nocaptions|>",
846
+ "lstrip": true,
847
+ "normalized": false,
848
+ "rstrip": true,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "606": {
853
+ "content": "<|notimestamps|>",
854
+ "lstrip": true,
855
+ "normalized": false,
856
+ "rstrip": true,
857
+ "single_word": false,
858
+ "special": true
859
+ }
860
+ },
861
+ "additional_special_tokens": [
862
+ "<|endoftext|>",
863
+ "<|startoftranscript|>",
864
+ "<|en|>",
865
+ "<|zh|>",
866
+ "<|de|>",
867
+ "<|es|>",
868
+ "<|ru|>",
869
+ "<|ko|>",
870
+ "<|fr|>",
871
+ "<|ja|>",
872
+ "<|pt|>",
873
+ "<|tr|>",
874
+ "<|pl|>",
875
+ "<|ca|>",
876
+ "<|nl|>",
877
+ "<|ar|>",
878
+ "<|sv|>",
879
+ "<|it|>",
880
+ "<|id|>",
881
+ "<|hi|>",
882
+ "<|fi|>",
883
+ "<|vi|>",
884
+ "<|he|>",
885
+ "<|uk|>",
886
+ "<|el|>",
887
+ "<|ms|>",
888
+ "<|cs|>",
889
+ "<|ro|>",
890
+ "<|da|>",
891
+ "<|hu|>",
892
+ "<|ta|>",
893
+ "<|no|>",
894
+ "<|th|>",
895
+ "<|ur|>",
896
+ "<|hr|>",
897
+ "<|bg|>",
898
+ "<|lt|>",
899
+ "<|la|>",
900
+ "<|mi|>",
901
+ "<|ml|>",
902
+ "<|cy|>",
903
+ "<|sk|>",
904
+ "<|te|>",
905
+ "<|fa|>",
906
+ "<|lv|>",
907
+ "<|bn|>",
908
+ "<|sr|>",
909
+ "<|az|>",
910
+ "<|sl|>",
911
+ "<|kn|>",
912
+ "<|et|>",
913
+ "<|mk|>",
914
+ "<|br|>",
915
+ "<|eu|>",
916
+ "<|is|>",
917
+ "<|hy|>",
918
+ "<|ne|>",
919
+ "<|mn|>",
920
+ "<|bs|>",
921
+ "<|kk|>",
922
+ "<|sq|>",
923
+ "<|sw|>",
924
+ "<|gl|>",
925
+ "<|mr|>",
926
+ "<|pa|>",
927
+ "<|si|>",
928
+ "<|km|>",
929
+ "<|sn|>",
930
+ "<|yo|>",
931
+ "<|so|>",
932
+ "<|af|>",
933
+ "<|oc|>",
934
+ "<|ka|>",
935
+ "<|be|>",
936
+ "<|tg|>",
937
+ "<|sd|>",
938
+ "<|gu|>",
939
+ "<|am|>",
940
+ "<|yi|>",
941
+ "<|lo|>",
942
+ "<|uz|>",
943
+ "<|fo|>",
944
+ "<|ht|>",
945
+ "<|ps|>",
946
+ "<|tk|>",
947
+ "<|nn|>",
948
+ "<|mt|>",
949
+ "<|sa|>",
950
+ "<|lb|>",
951
+ "<|my|>",
952
+ "<|bo|>",
953
+ "<|tl|>",
954
+ "<|mg|>",
955
+ "<|as|>",
956
+ "<|tt|>",
957
+ "<|haw|>",
958
+ "<|ln|>",
959
+ "<|ha|>",
960
+ "<|ba|>",
961
+ "<|jw|>",
962
+ "<|su|>",
963
+ "<|translate|>",
964
+ "<|transcribe|>",
965
+ "<|startoflm|>",
966
+ "<|startofprev|>",
967
+ "<|nocaptions|>",
968
+ "<|notimestamps|>"
969
+ ],
970
+ "bos_token": "<|endoftext|>",
971
+ "clean_up_tokenization_spaces": true,
972
+ "eos_token": "<|endoftext|>",
973
+ "errors": "replace",
974
+ "model_max_length": 1000000000000000019884624838656,
975
+ "pad_token": "<|endoftext|>",
976
+ "tokenizer_class": "WhisperTokenizer",
977
+ "unk_token": "<|endoftext|>"
978
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b66e6a7501fb15138e3f8d424013fb5165b51f25f2a1b30472a54a832091810
3
+ size 4664
vocab.json ADDED
@@ -0,0 +1,502 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "0": 0,
3
+ "1": 1,
4
+ "11": 57,
5
+ "13": 76,
6
+ "2": 2,
7
+ "21": 63,
8
+ "23": 168,
9
+ "24": 62,
10
+ "3": 3,
11
+ "4": 4,
12
+ "41": 110,
13
+ "42": 60,
14
+ "44": 123,
15
+ "442": 112,
16
+ "453": 58,
17
+ "5": 5,
18
+ "53": 56,
19
+ "533": 59,
20
+ "544": 124,
21
+ "55": 92,
22
+ "a": 6,
23
+ "ai": 69,
24
+ "akieu": 449,
25
+ "am": 447,
26
+ "an": 448,
27
+ "au": 77,
28
+ "aÅĭ": 93,
29
+ "aÊĶ": 200,
30
+ "b": 7,
31
+ "e": 8,
32
+ "eÅĭ": 118,
33
+ "eÊĶ": 116,
34
+ "h": 9,
35
+ "ha": 311,
36
+ "hai": 191,
37
+ "he": 176,
38
+ "hi": 204,
39
+ "hia": 406,
40
+ "ho": 252,
41
+ "hou": 278,
42
+ "hu": 73,
43
+ "hua": 105,
44
+ "huai": 129,
45
+ "hue": 132,
46
+ "hui": 147,
47
+ "huoi": 134,
48
+ "huɤɯ": 133,
49
+ "hy": 142,
50
+ "hya": 439,
51
+ "hyÉĴ": 405,
52
+ "hyÉĶ": 440,
53
+ "hÉĴ": 226,
54
+ "hÉĶ": 265,
55
+ "hÉĶu": 135,
56
+ "i": 10,
57
+ "ia": 83,
58
+ "iau": 136,
59
+ "ie": 95,
60
+ "ieu": 102,
61
+ "ieÅĭ": 238,
62
+ "ieÊĶ": 433,
63
+ "in": 163,
64
+ "iu": 97,
65
+ "iũ": 407,
66
+ "iÅĭ": 84,
67
+ "iÊĶ": 119,
68
+ "k": 11,
69
+ "kai": 192,
70
+ "kaÊĶ": 313,
71
+ "ke": 107,
72
+ "kh": 87,
73
+ "kha": 484,
74
+ "khe": 152,
75
+ "khi": 249,
76
+ "khieu": 427,
77
+ "khou": 392,
78
+ "khue": 451,
79
+ "khui": 312,
80
+ "khuɤɯ": 452,
81
+ "khy": 237,
82
+ "khÉĴ": 223,
83
+ "khÉĶ": 303,
84
+ "khÉĶu": 175,
85
+ "ki": 205,
86
+ "kiau": 409,
87
+ "kieu": 229,
88
+ "kin": 363,
89
+ "kiÅĭ": 343,
90
+ "kiÉĽu": 443,
91
+ "ko": 239,
92
+ "kou": 450,
93
+ "ku": 108,
94
+ "kua": 143,
95
+ "kuai": 335,
96
+ "kue": 305,
97
+ "kuoi": 307,
98
+ "kuɤɯ": 306,
99
+ "ky": 130,
100
+ "kø": 362,
101
+ "kɤɯ": 408,
102
+ "kÉĴ": 177,
103
+ "kÉĶ": 227,
104
+ "kÉĶu": 228,
105
+ "l": 12,
106
+ "le": 240,
107
+ "li": 145,
108
+ "liu": 456,
109
+ "liÊĶ": 365,
110
+ "lo": 253,
111
+ "lou": 457,
112
+ "lua": 454,
113
+ "lui": 254,
114
+ "ly": 293,
115
+ "lø": 410,
116
+ "lɤɯ": 455,
117
+ "lÉĴ": 364,
118
+ "lÉĶ": 453,
119
+ "lÉĶu": 241,
120
+ "m": 13,
121
+ "ma": 215,
122
+ "min": 460,
123
+ "miÅĭ": 314,
124
+ "mui": 459,
125
+ "mÉĴ": 366,
126
+ "mÉĶ": 458,
127
+ "n": 14,
128
+ "ni": 279,
129
+ "niÅĭ": 461,
130
+ "nÉĶu": 367,
131
+ "o": 15,
132
+ "oi": 100,
133
+ "on": 462,
134
+ "ou": 98,
135
+ "oÅĭ": 180,
136
+ "p": 16,
137
+ "pa": 255,
138
+ "pai": 131,
139
+ "pau": 463,
140
+ "pe": 120,
141
+ "peÊĶ": 465,
142
+ "ph": 113,
143
+ "phai": 336,
144
+ "phaÅĭ": 337,
145
+ "phou": 403,
146
+ "phua": 315,
147
+ "phuai": 369,
148
+ "phue": 370,
149
+ "phuoi": 372,
150
+ "phuɤɯ": 371,
151
+ "phÉĶu": 199,
152
+ "pi": 201,
153
+ "piÅĭ": 242,
154
+ "po": 256,
155
+ "pou": 368,
156
+ "pu": 91,
157
+ "pua": 266,
158
+ "puai": 197,
159
+ "pue": 189,
160
+ "puoi": 198,
161
+ "puɤɯ": 190,
162
+ "pɤɯ": 411,
163
+ "pÉĴ": 206,
164
+ "pÉĶ": 257,
165
+ "pÉĶu": 193,
166
+ "pÉĽ": 464,
167
+ "s": 17,
168
+ "t": 18,
169
+ "ta": 280,
170
+ "tai": 317,
171
+ "te": 153,
172
+ "teÅĭ": 467,
173
+ "th": 103,
174
+ "thai": 292,
175
+ "thau": 498,
176
+ "thaÅĭ": 499,
177
+ "thi": 291,
178
+ "thiu": 334,
179
+ "thiÅĭ": 402,
180
+ "thua": 267,
181
+ "thui": 319,
182
+ "thy": 401,
183
+ "thÉĶu": 276,
184
+ "ti": 169,
185
+ "tiu": 345,
186
+ "tiÅĭ": 466,
187
+ "to": 316,
188
+ "tou": 294,
189
+ "ts": 66,
190
+ "tsai": 322,
191
+ "tse": 270,
192
+ "tsh": 81,
193
+ "tsha": 353,
194
+ "tshai": 236,
195
+ "tshau": 425,
196
+ "tshaÅĭ": 483,
197
+ "tsheÅĭ": 426,
198
+ "tshi": 235,
199
+ "tshia": 301,
200
+ "tshiu": 261,
201
+ "tshui": 323,
202
+ "tshy": 213,
203
+ "tshø": 331,
204
+ "tshÉĴ": 390,
205
+ "tshÉĴÊĶ": 391,
206
+ "tshÉĶ": 389,
207
+ "tsi": 271,
208
+ "tsia": 194,
209
+ "tsiau": 472,
210
+ "tsieu": 297,
211
+ "tso": 230,
212
+ "tsou": 471,
213
+ "tsui": 210,
214
+ "tsy": 203,
215
+ "tsɤɯ": 416,
216
+ "tsÉĴ": 470,
217
+ "tsÉĶ": 469,
218
+ "tsÉĶu": 259,
219
+ "tua": 140,
220
+ "tui": 318,
221
+ "ty": 139,
222
+ "tø": 295,
223
+ "tÉĴ": 344,
224
+ "tÉĶ": 373,
225
+ "tÉĶu": 144,
226
+ "u": 19,
227
+ "ua": 78,
228
+ "uai": 202,
229
+ "ue": 216,
230
+ "ui": 94,
231
+ "uoi": 269,
232
+ "uɤɯ": 268,
233
+ "y": 20,
234
+ "£": 21,
235
+ "¤": 22,
236
+ "¦": 23,
237
+ "©": 24,
238
+ "¬": 25,
239
+ "¯": 26,
240
+ "²": 27,
241
+ "¸": 28,
242
+ "¼": 29,
243
+ "¾": 30,
244
+ "Ã": 31,
245
+ "ã": 141,
246
+ "ø": 101,
247
+ "øn": 496,
248
+ "øÅĭ": 434,
249
+ "øÊĶ": 497,
250
+ "Ä": 32,
251
+ "Ä©": 243,
252
+ "Å": 33,
253
+ "Å©": 281,
254
+ "Åĭ": 65,
255
+ "ÅĭÉĶu": 296,
256
+ "Åĵ": 374,
257
+ "Ç": 34,
258
+ "Ǿ": 61,
259
+ "Ǿai": 320,
260
+ "Ǿau": 245,
261
+ "ǾaÅĭ": 258,
262
+ "Ǿe": 146,
263
+ "Ǿi": 244,
264
+ "Ǿia": 156,
265
+ "Ǿiu": 246,
266
+ "ǾiÅĭ": 415,
267
+ "Ǿo": 413,
268
+ "Ǿou": 321,
269
+ "Ǿu": 414,
270
+ "Ǿua": 209,
271
+ "Ǿy": 376,
272
+ "ǾÉĴ": 220,
273
+ "ǾÉĶ": 219,
274
+ "ǾÉĶu": 170,
275
+ "É": 35,
276
+ "ɤ": 82,
277
+ "ɤÅĭ": 222,
278
+ "ɤɯ": 86,
279
+ "ɤÊĶ": 302,
280
+ "ɬ": 67,
281
+ "ɬai": 298,
282
+ "ɬaÅĭ": 346,
283
+ "ɬe": 181,
284
+ "ɬi": 182,
285
+ "ɬia": 125,
286
+ "ɬiau": 324,
287
+ "ɬieu": 282,
288
+ "ɬiu": 417,
289
+ "ɬiÊĶ": 473,
290
+ "ɬo": 221,
291
+ "ɬu": 247,
292
+ "ɬua": 184,
293
+ "ɬy": 183,
294
+ "ɬø": 185,
295
+ "ɬÉĶu": 377,
296
+ "ɬÉĶÊĶ": 418,
297
+ "ɯ": 85,
298
+ "ÉĴ": 74,
299
+ "ÉĴÊĶ": 186,
300
+ "ÉĶ": 64,
301
+ "ÉĶu": 72,
302
+ "ÉĶÅĭ": 117,
303
+ "ÉĶÊĶ": 122,
304
+ "ÉĽ": 99,
305
+ "ÉĽn": 400,
306
+ "ÉĽu": 173,
307
+ "ÉĽÅĭ": 264,
308
+ "Ê": 36,
309
+ "ÊĶ": 75,
310
+ "Ì": 37,
311
+ "Ìĥ": 106,
312
+ "Î": 38,
313
+ "β": 217,
314
+ "ã": 39,
315
+ "ãĢ": 158,
316
+ "ï": 40,
317
+ "ï¼": 207,
318
+ "Ċ": 41,
319
+ "č": 42,
320
+ "čĊ": 54,
321
+ "Ġ": 43,
322
+ "Ġh": 109,
323
+ "Ġha": 250,
324
+ "ĠhiÅ©": 435,
325
+ "Ġhu": 412,
326
+ "Ġhua": 468,
327
+ "ĠhÉĴ": 251,
328
+ "ĠhÉĶ": 308,
329
+ "Ġk": 71,
330
+ "Ġka": 178,
331
+ "Ġkai": 161,
332
+ "Ġkau": 274,
333
+ "ĠkaÅĭ": 382,
334
+ "ĠkeÊĶ": 328,
335
+ "Ġkh": 121,
336
+ "Ġkhai": 309,
337
+ "Ġkhi": 339,
338
+ "Ġkhu": 381,
339
+ "Ġkhua": 476,
340
+ "Ġki": 327,
341
+ "Ġkia": 421,
342
+ "ĠkiÅĭ": 475,
343
+ "Ġko": 273,
344
+ "ĠkoÊĶ": 446,
345
+ "Ġku": 155,
346
+ "Ġkua": 212,
347
+ "Ġkuai": 441,
348
+ "Ġkuɤɯ": 360,
349
+ "Ġky": 284,
350
+ "ĠkÉĴ": 172,
351
+ "ĠkÉĶ": 166,
352
+ "ĠkÉĶu": 285,
353
+ "ĠkÉĶÅĭ": 383,
354
+ "Ġl": 70,
355
+ "Ġlai": 171,
356
+ "Ġlau": 111,
357
+ "ĠlaÅĭ": 299,
358
+ "Ġle": 164,
359
+ "ĠleÅĭ": 420,
360
+ "ĠleÊĶ": 233,
361
+ "Ġli": 104,
362
+ "Ġlia": 380,
363
+ "Ġliu": 300,
364
+ "ĠliÊĶ": 225,
365
+ "Ġlo": 326,
366
+ "Ġlua": 474,
367
+ "Ġlui": 165,
368
+ "Ġly": 349,
369
+ "ĠlÉĴ": 379,
370
+ "ĠlÉĽ": 195,
371
+ "ĠlÉĽÊĶ": 361,
372
+ "Ġm": 88,
373
+ "Ġma": 126,
374
+ "Ġmai": 262,
375
+ "Ġme": 485,
376
+ "Ġmi": 275,
377
+ "Ġmia": 429,
378
+ "ĠmiÅĭ": 393,
379
+ "Ġmua": 289,
380
+ "Ġmuai": 430,
381
+ "Ġmue": 486,
382
+ "ĠmÉĴ": 332,
383
+ "ĠmÉĶ": 428,
384
+ "Ġn": 79,
385
+ "Ġnai": 329,
386
+ "Ġnau": 157,
387
+ "ĠnaÅĭ": 385,
388
+ "Ġne": 350,
389
+ "Ġni": 150,
390
+ "Ġnieu": 387,
391
+ "ĠniÅĭ": 128,
392
+ "Ġnu": 351,
393
+ "Ġnua": 478,
394
+ "Ġnui": 286,
395
+ "Ġny": 422,
396
+ "Ġnø": 386,
397
+ "ĠnÉĴ": 384,
398
+ "ĠnÉĶ": 477,
399
+ "ĠnÉĶÅĭ": 479,
400
+ "Ġp": 96,
401
+ "Ġpai": 304,
402
+ "Ġph": 148,
403
+ "Ġphi": 342,
404
+ "ĠpiÊĶ": 495,
405
+ "Ġpo": 224,
406
+ "Ġpu": 375,
407
+ "Ġpɤɯ": 356,
408
+ "ĠpÉĶÊĶ": 399,
409
+ "Ġt": 89,
410
+ "Ġtai": 214,
411
+ "Ġtau": 396,
412
+ "Ġte": 196,
413
+ "ĠteÅĭ": 290,
414
+ "Ġth": 487,
415
+ "Ġthe": 333,
416
+ "Ġti": 394,
417
+ "Ġts": 90,
418
+ "Ġtse": 354,
419
+ "ĠtseÊĶ": 398,
420
+ "Ġtsh": 137,
421
+ "Ġtshy": 341,
422
+ "Ġtsi": 188,
423
+ "Ġtsia": 263,
424
+ "Ġtsieu": 432,
425
+ "ĠtsiÅĭ": 492,
426
+ "ĠtsiÊĶ": 493,
427
+ "Ġtsui": 355,
428
+ "Ġtsy": 179,
429
+ "ĠtsÉĴ": 491,
430
+ "ĠtsÉĴÊĶ": 494,
431
+ "ĠtsÉĶ": 397,
432
+ "Ġtua": 489,
433
+ "Ġtui": 490,
434
+ "Ġty": 431,
435
+ "ĠtÉĴ": 395,
436
+ "ĠtÉĶ": 488,
437
+ "ĠÅĭ": 127,
438
+ "ĠÅĭiÅĭ": 359,
439
+ "ĠÅĭua": 340,
440
+ "ĠÅĭÉĶu": 404,
441
+ "ĠǾ": 68,
442
+ "ĠǾa": 160,
443
+ "ĠǾau": 138,
444
+ "ĠǾaÅĭ": 211,
445
+ "ĠǾe": 260,
446
+ "ĠǾeÊĶ": 419,
447
+ "ĠǾi": 114,
448
+ "ĠǾia": 325,
449
+ "ĠǾiau": 437,
450
+ "ĠǾiu": 231,
451
+ "ĠǾiÅĭ": 272,
452
+ "ĠǾiÊĶ": 436,
453
+ "ĠǾo": 378,
454
+ "ĠǾou": 348,
455
+ "ĠǾu": 115,
456
+ "ĠǾua": 149,
457
+ "ĠǾuai": 338,
458
+ "ĠǾue": 357,
459
+ "ĠǾuoi": 358,
460
+ "ĠǾuã": 438,
461
+ "ĠǾy": 174,
462
+ "ĠǾyÅĭ": 442,
463
+ "ĠǾÉĴ": 154,
464
+ "ĠǾÉĴÅĭ": 310,
465
+ "ĠǾÉĶ": 347,
466
+ "ĠǾÉĶu": 248,
467
+ "ĠǾÉĶÅĭ": 283,
468
+ "ĠÇ¾ÉĽ": 232,
469
+ "ĠÇ¾ÉĽÊĶ": 445,
470
+ "Ġɬ": 80,
471
+ "Ġɬa": 330,
472
+ "Ġɬe": 151,
473
+ "Ġɬi": 187,
474
+ "Ġɬia": 234,
475
+ "Ġɬieu": 352,
476
+ "ĠɬiÊĶ": 288,
477
+ "Ġɬo": 480,
478
+ "Ġɬu": 423,
479
+ "Ġɬui": 481,
480
+ "Ġɬy": 167,
481
+ "ĠɬÉĴ": 424,
482
+ "ĠɬÉĶ": 388,
483
+ "ĠɬÉĶu": 287,
484
+ "ĠɬÉĶÊĶ": 482,
485
+ "Ġβ": 218,
486
+ "ĠãĢ": 159,
487
+ "ĠãĢĤ": 162,
488
+ "Ġï¼": 208,
489
+ "Ġï¼Į": 444,
490
+ "Ġï¼Ł": 277,
491
+ "ĠčĊ": 55,
492
+ "Ģ": 44,
493
+ "Ĥ": 45,
494
+ "ĥ": 46,
495
+ "ĭ": 47,
496
+ "Į": 48,
497
+ "Ĵ": 49,
498
+ "ĵ": 50,
499
+ "Ķ": 51,
500
+ "Ľ": 52,
501
+ "Ł": 53
502
+ }