tokenizer_config.json · vpelloin/MEDIA_NLU-flaubert_oral

MEDIA_NLU-flaubert_oral_ft / tokenizer_config.json

Upload tokenizer

17c4388 over 1 year ago

616 Bytes

	{
	"additional_special_tokens": [
	"<special0>",
	"<special1>",
	"<special2>",
	"<special3>",
	"<special4>",
	"<special5>",
	"<special6>",
	"<special7>",
	"<special8>",
	"<special9>"
	],
	"bos_token": "<s>",
	"clean_up_tokenization_spaces": true,
	"cls_token": "</s>",
	"do_lower_case": true,
	"id2lang": null,
	"keep_accents": true,
	"lang2id": null,
	"mask_token": "<special1>",
	"model_max_length": 1000000000000000019884624838656,
	"pad_token": "<pad>",
	"sep_token": "</s>",
	"tokenizer_class": "FlaubertTokenizer",
	"tokenizer_file": null,
	"unk_token": "<unk>"
	}