nesv042 commited on
Commit
911013a
1 Parent(s): 3d4a29a

Adding rhoknp package reference

Browse files
Files changed (1) hide show
  1. tokenization_deberta_v2_jumanpp.py +2 -1
tokenization_deberta_v2_jumanpp.py CHANGED
@@ -24,12 +24,13 @@ class JumanppTokenizer:
24
  "You need to install rhoknp to use JumanppPreTokenizer. "
25
  "See https://github.com/ku-nlp/rhoknp for installation."
26
  )
 
27
  self.jumanpp = rhoknp.Jumanpp()
28
 
29
  def tokenize(self, text: str) -> str:
30
  morphemes = self.jumanpp.apply_to_sentence(text).morphemes
31
  if not morphemes:
32
- doc = rhoknp.Document.from_raw_text(text)
33
  morphemes = self.jumanpp.apply_to_document(doc).morphemes
34
  return " ".join([morpheme.surf for morpheme in morphemes])
35
 
 
24
  "You need to install rhoknp to use JumanppPreTokenizer. "
25
  "See https://github.com/ku-nlp/rhoknp for installation."
26
  )
27
+ self.rhoknp = rhoknp
28
  self.jumanpp = rhoknp.Jumanpp()
29
 
30
  def tokenize(self, text: str) -> str:
31
  morphemes = self.jumanpp.apply_to_sentence(text).morphemes
32
  if not morphemes:
33
+ doc = self.rhoknp.Document.from_raw_text(text)
34
  morphemes = self.jumanpp.apply_to_document(doc).morphemes
35
  return " ".join([morpheme.surf for morpheme in morphemes])
36