KoichiYasuoka commited on
Commit
03cac51
1 Parent(s): c354835
Files changed (1) hide show
  1. ud.py +2 -3
ud.py CHANGED
@@ -1,10 +1,11 @@
 
1
  from transformers import TokenClassificationPipeline,DebertaV2TokenizerFast
2
  from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
3
  try:
4
  from transformers.utils import cached_file
5
  except:
6
  from transformers.file_utils import cached_path,hf_bucket_url
7
- cached_file=lambda x,y:cached_path(hf_bucket_url(x,y))
8
 
9
  class UniversalDependenciesPipeline(TokenClassificationPipeline):
10
  def _forward(self,model_inputs):
@@ -76,7 +77,6 @@ class MecabPreTokenizer(MecabTokenizer):
76
 
77
  class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
78
  def __init__(self,**kwargs):
79
- import os
80
  from tokenizers.pre_tokenizers import PreTokenizer,Metaspace,Sequence
81
  super().__init__(**kwargs)
82
  d,r="/var/lib/mecab/dic/juman-utf8","/etc/mecabrc"
@@ -93,7 +93,6 @@ class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
93
  self.custom_pre_tokenizer=Sequence([PreTokenizer.custom(MecabPreTokenizer(mecab_dic=None,mecab_option="-d "+d+" -r "+r)),Metaspace()])
94
  self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
95
  def save_pretrained(self,save_directory,**kwargs):
96
- import os
97
  import shutil
98
  from tokenizers.pre_tokenizers import Metaspace
99
  self._auto_map={"AutoTokenizer":[None,"ud.JumanDebertaV2TokenizerFast"]}
 
1
+ import os
2
  from transformers import TokenClassificationPipeline,DebertaV2TokenizerFast
3
  from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
4
  try:
5
  from transformers.utils import cached_file
6
  except:
7
  from transformers.file_utils import cached_path,hf_bucket_url
8
+ cached_file=lambda x,y:os.path.join(x,y) if os.path.isdir(x) else cached_path(hf_bucket_url(x,y))
9
 
10
  class UniversalDependenciesPipeline(TokenClassificationPipeline):
11
  def _forward(self,model_inputs):
 
77
 
78
  class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
79
  def __init__(self,**kwargs):
 
80
  from tokenizers.pre_tokenizers import PreTokenizer,Metaspace,Sequence
81
  super().__init__(**kwargs)
82
  d,r="/var/lib/mecab/dic/juman-utf8","/etc/mecabrc"
 
93
  self.custom_pre_tokenizer=Sequence([PreTokenizer.custom(MecabPreTokenizer(mecab_dic=None,mecab_option="-d "+d+" -r "+r)),Metaspace()])
94
  self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
95
  def save_pretrained(self,save_directory,**kwargs):
 
96
  import shutil
97
  from tokenizers.pre_tokenizers import Metaspace
98
  self._auto_map={"AutoTokenizer":[None,"ud.JumanDebertaV2TokenizerFast"]}