Commit
•
c354835
1
Parent(s):
7106399
bug fix
Browse files
ud.py
CHANGED
@@ -1,5 +1,10 @@
|
|
1 |
from transformers import TokenClassificationPipeline,DebertaV2TokenizerFast
|
2 |
from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
class UniversalDependenciesPipeline(TokenClassificationPipeline):
|
5 |
def _forward(self,model_inputs):
|
@@ -78,14 +83,9 @@ class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
|
|
78 |
if not (os.path.isdir(d) and os.path.isfile(r)):
|
79 |
import zipfile
|
80 |
import tempfile
|
81 |
-
try:
|
82 |
-
from transformers.utils import cached_file
|
83 |
-
except:
|
84 |
-
from transformers.file_utils import cached_path,hf_bucket_url
|
85 |
-
cached_file=lambda x,y:cached_path(hf_bucket_url(x,y))
|
86 |
self.dicdir=tempfile.TemporaryDirectory()
|
87 |
d=self.dicdir.name
|
88 |
-
with zipfile.ZipFile(cached_file(
|
89 |
z.extractall(d)
|
90 |
r=os.path.join(d,"mecabrc")
|
91 |
with open(r,"w",encoding="utf-8") as w:
|
@@ -101,3 +101,4 @@ class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
|
|
101 |
super().save_pretrained(save_directory,**kwargs)
|
102 |
self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
|
103 |
shutil.copy(os.path.abspath(__file__),os.path.join(save_directory,"ud.py"))
|
|
|
|
1 |
from transformers import TokenClassificationPipeline,DebertaV2TokenizerFast
|
2 |
from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
|
3 |
+
try:
|
4 |
+
from transformers.utils import cached_file
|
5 |
+
except:
|
6 |
+
from transformers.file_utils import cached_path,hf_bucket_url
|
7 |
+
cached_file=lambda x,y:cached_path(hf_bucket_url(x,y))
|
8 |
|
9 |
class UniversalDependenciesPipeline(TokenClassificationPipeline):
|
10 |
def _forward(self,model_inputs):
|
|
|
83 |
if not (os.path.isdir(d) and os.path.isfile(r)):
|
84 |
import zipfile
|
85 |
import tempfile
|
|
|
|
|
|
|
|
|
|
|
86 |
self.dicdir=tempfile.TemporaryDirectory()
|
87 |
d=self.dicdir.name
|
88 |
+
with zipfile.ZipFile(cached_file(self.name_or_path,"mecab-jumandic-utf8.zip")) as z:
|
89 |
z.extractall(d)
|
90 |
r=os.path.join(d,"mecabrc")
|
91 |
with open(r,"w",encoding="utf-8") as w:
|
|
|
101 |
super().save_pretrained(save_directory,**kwargs)
|
102 |
self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
|
103 |
shutil.copy(os.path.abspath(__file__),os.path.join(save_directory,"ud.py"))
|
104 |
+
shutil.copy(cached_file(self.name_or_path,"mecab-jumandic-utf8.zip"),os.path.join(save_directory,"mecab-jumandic-utf8.zip"))
|