kajyuuen commited on
Commit
7fd2c42
1 Parent(s): fbeeaf3

default unidic_lite

Browse files
Files changed (1) hide show
  1. distilbert_japanese_tokenizer.py +2 -2
distilbert_japanese_tokenizer.py CHANGED
@@ -440,7 +440,7 @@ class MecabTokenizer:
440
  do_lower_case=False,
441
  never_split=None,
442
  normalize_text=True,
443
- mecab_dic: Optional[str] = "unidic",
444
  mecab_option: Optional[str] = None,
445
  ):
446
  """
@@ -454,7 +454,7 @@ class MecabTokenizer:
454
  [`PreTrainedTokenizer.tokenize`]) List of tokens not to split.
455
  **normalize_text**: (*optional*) boolean (default True)
456
  Whether to apply unicode normalization to text before tokenization.
457
- **mecab_dic**: (*optional*) string (default "unidic")
458
  Name of dictionary to be used for MeCab initialization. If you are using a system-installed dictionary,
459
  set this option to `None` and modify *mecab_option*.
460
  **mecab_option**: (*optional*) string
 
440
  do_lower_case=False,
441
  never_split=None,
442
  normalize_text=True,
443
+ mecab_dic: Optional[str] = "unidic_lite",
444
  mecab_option: Optional[str] = None,
445
  ):
446
  """
 
454
  [`PreTrainedTokenizer.tokenize`]) List of tokens not to split.
455
  **normalize_text**: (*optional*) boolean (default True)
456
  Whether to apply unicode normalization to text before tokenization.
457
+ **mecab_dic**: (*optional*) string (default "unidic_lite")
458
  Name of dictionary to be used for MeCab initialization. If you are using a system-installed dictionary,
459
  set this option to `None` and modify *mecab_option*.
460
  **mecab_option**: (*optional*) string