KoichiYasuoka commited on
Commit
c354835
1 Parent(s): 7106399
Files changed (1) hide show
  1. ud.py +7 -6
ud.py CHANGED
@@ -1,5 +1,10 @@
1
  from transformers import TokenClassificationPipeline,DebertaV2TokenizerFast
2
  from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
 
 
 
 
 
3
 
4
  class UniversalDependenciesPipeline(TokenClassificationPipeline):
5
  def _forward(self,model_inputs):
@@ -78,14 +83,9 @@ class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
78
  if not (os.path.isdir(d) and os.path.isfile(r)):
79
  import zipfile
80
  import tempfile
81
- try:
82
- from transformers.utils import cached_file
83
- except:
84
- from transformers.file_utils import cached_path,hf_bucket_url
85
- cached_file=lambda x,y:cached_path(hf_bucket_url(x,y))
86
  self.dicdir=tempfile.TemporaryDirectory()
87
  d=self.dicdir.name
88
- with zipfile.ZipFile(cached_file("KoichiYasuoka/deberta-base-japanese-juman-ud-goeswith","mecab-jumandic-utf8.zip")) as z:
89
  z.extractall(d)
90
  r=os.path.join(d,"mecabrc")
91
  with open(r,"w",encoding="utf-8") as w:
@@ -101,3 +101,4 @@ class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
101
  super().save_pretrained(save_directory,**kwargs)
102
  self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
103
  shutil.copy(os.path.abspath(__file__),os.path.join(save_directory,"ud.py"))
 
 
1
  from transformers import TokenClassificationPipeline,DebertaV2TokenizerFast
2
  from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
3
+ try:
4
+ from transformers.utils import cached_file
5
+ except:
6
+ from transformers.file_utils import cached_path,hf_bucket_url
7
+ cached_file=lambda x,y:cached_path(hf_bucket_url(x,y))
8
 
9
  class UniversalDependenciesPipeline(TokenClassificationPipeline):
10
  def _forward(self,model_inputs):
 
83
  if not (os.path.isdir(d) and os.path.isfile(r)):
84
  import zipfile
85
  import tempfile
 
 
 
 
 
86
  self.dicdir=tempfile.TemporaryDirectory()
87
  d=self.dicdir.name
88
+ with zipfile.ZipFile(cached_file(self.name_or_path,"mecab-jumandic-utf8.zip")) as z:
89
  z.extractall(d)
90
  r=os.path.join(d,"mecabrc")
91
  with open(r,"w",encoding="utf-8") as w:
 
101
  super().save_pretrained(save_directory,**kwargs)
102
  self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
103
  shutil.copy(os.path.abspath(__file__),os.path.join(save_directory,"ud.py"))
104
+ shutil.copy(cached_file(self.name_or_path,"mecab-jumandic-utf8.zip"),os.path.join(save_directory,"mecab-jumandic-utf8.zip"))