usoni1 commited on
Commit
16592fa
1 Parent(s): cc7773c

Update tokenizer_config.json

Browse files

Need to add bos_token, eos_token and unk_token explicitly. Otherwise, it causes maximum recursion depth error. See https://github.com/EleutherAI/lm-evaluation-harness/issues/442

Files changed (1) hide show
  1. tokenizer_config.json +3 -3
tokenizer_config.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "bos_token": "",
3
- "eos_token": "",
4
  "model_max_length": 512,
5
  "padding_side": "right",
6
  "special_tokens_map_file": "/home/ubuntu/.cache/huggingface/hub/models--decapoda-research--llama-7b-hf/snapshots/5f98eefcc80e437ef68d457ad7bf167c2c6a1348/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
- "unk_token": ""
9
  }
 
1
  {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
  "model_max_length": 512,
5
  "padding_side": "right",
6
  "special_tokens_map_file": "/home/ubuntu/.cache/huggingface/hub/models--decapoda-research--llama-7b-hf/snapshots/5f98eefcc80e437ef68d457ad7bf167c2c6a1348/special_tokens_map.json",
7
  "tokenizer_class": "LlamaTokenizer",
8
+ "unk_token": "</unk>"
9
  }