basujindal commited on
Commit
b7feb49
1 Parent(s): 56d6481

Update tokenizer_config.json

Browse files

Configuring bos, eos, pad and unk tokens helps load the tokenizer successfully, otherwise getting a recursion error "RecursionError: maximum recursion depth exceeded while getting the str of an object." As done here: https://huggingface.co/medalpaca/medalpaca-13b/discussions/7

Files changed (1) hide show
  1. tokenizer_config.json +4 -3
tokenizer_config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
- "bos_token": "",
3
- "eos_token": "",
4
  "model_max_length": 512,
5
  "padding_side": "right",
6
  "special_tokens_map_file": "../llama-7b-hf/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
- "unk_token": ""
 
9
  }
 
1
  {
2
+ "bos_token": "</s>",
3
+ "eos_token": "</s>",
4
  "model_max_length": 512,
5
  "padding_side": "right",
6
  "special_tokens_map_file": "../llama-7b-hf/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
+ "unk_token": "</s>",
9
+ "pad_token": "[PAD]"
10
  }