pathcosmos commited on
Commit
8fb78b1
·
verified ·
1 Parent(s): c3a0907

fix: Update tokenizer_config.json with correct 3B ORPO byte-fallback tokenizer (vocab=64256)

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +7 -4
tokenizer_config.json CHANGED
@@ -1,11 +1,14 @@
1
  {
2
- "model_type": "llama",
3
- "tokenizer_class": "PreTrainedTokenizerFast",
4
  "bos_token": "<s>",
 
5
  "eos_token": "</s>",
6
- "unk_token": "<unk>",
 
 
7
  "pad_token": "<pad>",
8
- "clean_up_tokenization_spaces": false,
 
9
  "added_tokens_decoder": {
10
  "64000": {
11
  "content": "<0x00>",
 
1
  {
2
+ "backend": "tokenizers",
 
3
  "bos_token": "<s>",
4
+ "clean_up_tokenization_spaces": false,
5
  "eos_token": "</s>",
6
+ "is_local": true,
7
+ "model_max_length": 1000000000000000019884624838656,
8
+ "model_type": "llama",
9
  "pad_token": "<pad>",
10
+ "tokenizer_class": "TokenizersBackend",
11
+ "unk_token": "<unk>",
12
  "added_tokens_decoder": {
13
  "64000": {
14
  "content": "<0x00>",