jmedroberta-base-manbyo-wordpiece / tokenizer_config.json
kaisugi's picture
initial commit
4b0b386
{
"tokenizer_class": "BertJapaneseTokenizer",
"word_tokenizer_type": "mecab",
"subword_tokenizer_type": "wordpiece",
"mecab_kwargs": {
"mecab_dic": "ipadic",
"mecab_option": "-u /usr/local/lib/mecab/dic/userdic/MANBYO_201907_Dic-utf8.dic",
"normalize_text": false
}
}