nesv042 commited on
Commit
072cc83
1 Parent(s): 71e8994

Fix jumanpp.apply_to_sentence return empty list

Browse files
Files changed (1) hide show
  1. tokenization_deberta_v2_jumanpp.py +6 -1
tokenization_deberta_v2_jumanpp.py CHANGED
@@ -27,4 +27,9 @@ class JumanppTokenizer:
27
  self.jumanpp = rhoknp.Jumanpp()
28
 
29
  def tokenize(self, text: str) -> str:
30
- return " ".join([morpheme.surf for morpheme in self.jumanpp.apply_to_sentence(text).morphemes])
 
 
 
 
 
 
27
  self.jumanpp = rhoknp.Jumanpp()
28
 
29
  def tokenize(self, text: str) -> str:
30
+ morphemes = self.jumanpp.apply_to_sentence(text).morphemes
31
+ if not morphemes:
32
+ doc = rhoknp.Document.from_raw_text(text)
33
+ morphemes = self.jumanpp.apply_to_document(doc).morphemes
34
+ return " ".join([morpheme.surf for morpheme in morphemes])
35
+