Allow only unidic_lite

#2
by kajyuuen - opened
Files changed (2) hide show
  1. README.md +8 -0
  2. distilbert_japanese_tokenizer.py +0 -16
README.md CHANGED
@@ -28,6 +28,14 @@ sentence = "LINE株式会社で[MASK]の研究・開発をしている。"
28
  print(model(**tokenizer(sentence, return_tensors="pt")))
29
  ```
30
 
 
 
 
 
 
 
 
 
31
  ## Model architecture
32
 
33
  The model architecture is the DitilBERT base model; 6 layers, 768 dimensions of hidden states, 12 attention heads, 66M parameters.
 
28
  print(model(**tokenizer(sentence, return_tensors="pt")))
29
  ```
30
 
31
+ ### Requirements
32
+
33
+ ```txt
34
+ fugashi
35
+ sentencepiece
36
+ unidic-lite
37
+ ```
38
+
39
  ## Model architecture
40
 
41
  The model architecture is the DitilBERT base model; 6 layers, 768 dimensions of hidden states, 12 attention heads, 66M parameters.
distilbert_japanese_tokenizer.py CHANGED
@@ -485,22 +485,6 @@ class MecabTokenizer:
485
  )
486
 
487
  dic_dir = unidic_lite.DICDIR
488
- elif mecab_dic == "unidic":
489
- try:
490
- import unidic
491
- except ModuleNotFoundError as error:
492
- raise error.__class__(
493
- "The unidic dictionary is not installed. "
494
- "See https://github.com/polm/unidic-py for installation."
495
- )
496
-
497
- dic_dir = unidic.DICDIR
498
- if not os.path.isdir(dic_dir):
499
- raise RuntimeError(
500
- "The unidic dictionary itself is not found. "
501
- "See https://github.com/polm/unidic-py for installation."
502
- )
503
-
504
  else:
505
  raise ValueError("Invalid mecab_dic is specified.")
506
 
 
485
  )
486
 
487
  dic_dir = unidic_lite.DICDIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
488
  else:
489
  raise ValueError("Invalid mecab_dic is specified.")
490