Commit
•
03cac51
1
Parent(s):
c354835
bug fix
Browse files
ud.py
CHANGED
@@ -1,10 +1,11 @@
|
|
|
|
1 |
from transformers import TokenClassificationPipeline,DebertaV2TokenizerFast
|
2 |
from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
|
3 |
try:
|
4 |
from transformers.utils import cached_file
|
5 |
except:
|
6 |
from transformers.file_utils import cached_path,hf_bucket_url
|
7 |
-
cached_file=lambda x,y:cached_path(hf_bucket_url(x,y))
|
8 |
|
9 |
class UniversalDependenciesPipeline(TokenClassificationPipeline):
|
10 |
def _forward(self,model_inputs):
|
@@ -76,7 +77,6 @@ class MecabPreTokenizer(MecabTokenizer):
|
|
76 |
|
77 |
class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
|
78 |
def __init__(self,**kwargs):
|
79 |
-
import os
|
80 |
from tokenizers.pre_tokenizers import PreTokenizer,Metaspace,Sequence
|
81 |
super().__init__(**kwargs)
|
82 |
d,r="/var/lib/mecab/dic/juman-utf8","/etc/mecabrc"
|
@@ -93,7 +93,6 @@ class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
|
|
93 |
self.custom_pre_tokenizer=Sequence([PreTokenizer.custom(MecabPreTokenizer(mecab_dic=None,mecab_option="-d "+d+" -r "+r)),Metaspace()])
|
94 |
self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
|
95 |
def save_pretrained(self,save_directory,**kwargs):
|
96 |
-
import os
|
97 |
import shutil
|
98 |
from tokenizers.pre_tokenizers import Metaspace
|
99 |
self._auto_map={"AutoTokenizer":[None,"ud.JumanDebertaV2TokenizerFast"]}
|
|
|
1 |
+
import os
|
2 |
from transformers import TokenClassificationPipeline,DebertaV2TokenizerFast
|
3 |
from transformers.models.bert_japanese.tokenization_bert_japanese import MecabTokenizer
|
4 |
try:
|
5 |
from transformers.utils import cached_file
|
6 |
except:
|
7 |
from transformers.file_utils import cached_path,hf_bucket_url
|
8 |
+
cached_file=lambda x,y:os.path.join(x,y) if os.path.isdir(x) else cached_path(hf_bucket_url(x,y))
|
9 |
|
10 |
class UniversalDependenciesPipeline(TokenClassificationPipeline):
|
11 |
def _forward(self,model_inputs):
|
|
|
77 |
|
78 |
class JumanDebertaV2TokenizerFast(DebertaV2TokenizerFast):
|
79 |
def __init__(self,**kwargs):
|
|
|
80 |
from tokenizers.pre_tokenizers import PreTokenizer,Metaspace,Sequence
|
81 |
super().__init__(**kwargs)
|
82 |
d,r="/var/lib/mecab/dic/juman-utf8","/etc/mecabrc"
|
|
|
93 |
self.custom_pre_tokenizer=Sequence([PreTokenizer.custom(MecabPreTokenizer(mecab_dic=None,mecab_option="-d "+d+" -r "+r)),Metaspace()])
|
94 |
self._tokenizer.pre_tokenizer=self.custom_pre_tokenizer
|
95 |
def save_pretrained(self,save_directory,**kwargs):
|
|
|
96 |
import shutil
|
97 |
from tokenizers.pre_tokenizers import Metaspace
|
98 |
self._auto_map={"AutoTokenizer":[None,"ud.JumanDebertaV2TokenizerFast"]}
|