conan1024hao commited on
Commit
bbf4b6d
1 Parent(s): 9b0b4ac

support sentencepiece tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +1 -1
  2. vocab.txt +0 -0
tokenizer_config.json CHANGED
@@ -20,6 +20,6 @@
20
  "special_tokens_map_file": null,
21
  "tokenizer_class": "BertJapaneseTokenizer",
22
  "word_tokenizer_type": "jumanpp",
23
- "subword_tokenizer_type": "wordpiece",
24
  "jumanpp_kwargs": {}
25
  }
 
20
  "special_tokens_map_file": null,
21
  "tokenizer_class": "BertJapaneseTokenizer",
22
  "word_tokenizer_type": "jumanpp",
23
+ "subword_tokenizer_type": "sentencepiece",
24
  "jumanpp_kwargs": {}
25
  }
vocab.txt DELETED
The diff for this file is too large to render. See raw diff