Heinrich Dinkel commited on
Commit ·
79b1301
1
Parent(s): a09cac7
updated modeling
Browse files- modeling_glap.py +35 -9
modeling_glap.py
CHANGED
|
@@ -19,6 +19,11 @@ from einops import rearrange
|
|
| 19 |
from einops.layers.torch import Rearrange
|
| 20 |
from transformers import PreTrainedModel
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
from .configuration_glap import GlapConfig
|
| 23 |
|
| 24 |
|
|
@@ -815,17 +820,38 @@ class GlapModel(PreTrainedModel):
|
|
| 815 |
|
| 816 |
def _get_tokenizer(self) -> NllbTokenizer:
|
| 817 |
if self.tokenizer is None:
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
|
|
|
| 821 |
model_dir = Path(self.config._name_or_path)
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
|
| 826 |
-
|
| 827 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 828 |
)
|
|
|
|
| 829 |
self.tokenizer = NllbTokenizer(tokenizer_path)
|
| 830 |
return self.tokenizer
|
| 831 |
|
|
|
|
| 19 |
from einops.layers.torch import Rearrange
|
| 20 |
from transformers import PreTrainedModel
|
| 21 |
|
| 22 |
+
try:
|
| 23 |
+
from huggingface_hub import hf_hub_download
|
| 24 |
+
except ImportError:
|
| 25 |
+
hf_hub_download = None # type: ignore[assignment,misc]
|
| 26 |
+
|
| 27 |
from .configuration_glap import GlapConfig
|
| 28 |
|
| 29 |
|
|
|
|
| 820 |
|
| 821 |
def _get_tokenizer(self) -> NllbTokenizer:
|
| 822 |
if self.tokenizer is None:
|
| 823 |
+
tokenizer_filename = "sentencepiece.source.256000.model"
|
| 824 |
+
tokenizer_path: Optional[Path | str] = None
|
| 825 |
+
|
| 826 |
+
# 1. Check config._name_or_path (local directory)
|
| 827 |
model_dir = Path(self.config._name_or_path)
|
| 828 |
+
candidate = model_dir / tokenizer_filename
|
| 829 |
+
if candidate.exists():
|
| 830 |
+
tokenizer_path = candidate
|
| 831 |
+
|
| 832 |
+
# 2. Check next to this file (modules cache / local install)
|
| 833 |
+
if tokenizer_path is None:
|
| 834 |
+
candidate = Path(__file__).parent / tokenizer_filename
|
| 835 |
+
if candidate.exists():
|
| 836 |
+
tokenizer_path = candidate
|
| 837 |
+
|
| 838 |
+
# 3. Download from HuggingFace Hub
|
| 839 |
+
if tokenizer_path is None and hf_hub_download is not None:
|
| 840 |
+
try:
|
| 841 |
+
tokenizer_path = hf_hub_download(
|
| 842 |
+
repo_id=self.config._name_or_path,
|
| 843 |
+
filename=tokenizer_filename,
|
| 844 |
+
)
|
| 845 |
+
except Exception:
|
| 846 |
+
pass
|
| 847 |
+
|
| 848 |
+
if tokenizer_path is None:
|
| 849 |
+
raise FileNotFoundError(
|
| 850 |
+
f"Could not find {tokenizer_filename}. "
|
| 851 |
+
f"Searched {self.config._name_or_path} and "
|
| 852 |
+
f"{Path(__file__).parent}."
|
| 853 |
)
|
| 854 |
+
|
| 855 |
self.tokenizer = NllbTokenizer(tokenizer_path)
|
| 856 |
return self.tokenizer
|
| 857 |
|