ericaRC commited on
Commit
a64ae63
·
verified ·
1 Parent(s): 4043da7

Fix tokenizer_config: move lang codes to additional_special_tokens for transformers<5 compat

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +16 -15
tokenizer_config.json CHANGED
@@ -3,7 +3,20 @@
3
  "bos_token": "<s>",
4
  "cls_token": "<s>",
5
  "eos_token": "</s>",
6
- "extra_special_tokens": [
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  "ace_Arab",
8
  "ace_Latn",
9
  "acm_Arab",
@@ -206,17 +219,5 @@
206
  "zho_Hans",
207
  "zho_Hant",
208
  "zul_Latn"
209
- ],
210
- "is_local": false,
211
- "legacy_behaviour": false,
212
- "local_files_only": false,
213
- "mask_token": "<mask>",
214
- "model_max_length": 1024,
215
- "pad_token": "<pad>",
216
- "sep_token": "</s>",
217
- "sp_model_kwargs": {},
218
- "src_lang": "eng_Latn",
219
- "tgt_lang": null,
220
- "tokenizer_class": "NllbTokenizer",
221
- "unk_token": "<unk>"
222
- }
 
3
  "bos_token": "<s>",
4
  "cls_token": "<s>",
5
  "eos_token": "</s>",
6
+ "extra_special_tokens": {},
7
+ "is_local": false,
8
+ "legacy_behaviour": false,
9
+ "local_files_only": false,
10
+ "mask_token": "<mask>",
11
+ "model_max_length": 1024,
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "sp_model_kwargs": {},
15
+ "src_lang": "eng_Latn",
16
+ "tgt_lang": null,
17
+ "tokenizer_class": "NllbTokenizer",
18
+ "unk_token": "<unk>",
19
+ "additional_special_tokens": [
20
  "ace_Arab",
21
  "ace_Latn",
22
  "acm_Arab",
 
219
  "zho_Hans",
220
  "zho_Hant",
221
  "zul_Latn"
222
+ ]
223
+ }