AttributeError: 'CharTokenizer' object has no attribute 'vocab'

#1
by caveman1 - opened

при запуске в коллабе, да и не только, похоже, что версии пакетов ушли вперёд....

AttributeError Traceback (most recent call last)
in <cell line: 4>()
2
3 model_name = "IlyaGusev/ru-word-stress-transformer"
----> 4 tokenizer = AutoTokenizer.from_pretrained(
5 model_name,
6 trust_remote_code=True,

6 frames
/usr/local/lib/python3.10/dist-packages/transformers/models/auto/tokenization_auto.py in from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
753 if os.path.isdir(pretrained_model_name_or_path):
754 tokenizer_class.register_for_auto_class()
--> 755 return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
756 elif config_tokenizer_class is not None:
757 tokenizer_class = None

/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py in from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, *init_inputs, **kwargs)
2022 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
2023
-> 2024 return cls._from_pretrained(
2025 resolved_vocab_files,
2026 pretrained_model_name_or_path,

/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils_base.py in _from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, token, cache_dir, local_files_only, _commit_hash, _is_local, *init_inputs, **kwargs)
2254 # Instantiate the tokenizer.
2255 try:
-> 2256 tokenizer = cls(*init_inputs, **init_kwargs)
2257 except OSError:
2258 raise OSError(

~/.cache/huggingface/modules/transformers_modules/IlyaGusev/ru-word-stress-transformer/bae83ddbb1ac2aa9295d24bbd111eaa7caf18cf5/char_tokenizer.py in init(self, vocab_file, pad_token, unk_token, bos_token, eos_token, do_lower_case, *args, **kwargs)
31 **kwargs
32 ):
---> 33 super().init(
34 pad_token=pad_token,
35 unk_token=unk_token,

/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils.py in init(self, **kwargs)
365 # 4. If some of the special tokens are not part of the vocab, we add them, at the end.
366 # the order of addition is the same as self.SPECIAL_TOKENS_ATTRIBUTES following tokenizers
--> 367 self._add_tokens(
368 [token for token in self.all_special_tokens_extended if token not in self._added_tokens_encoder],
369 special_tokens=True,

/usr/local/lib/python3.10/dist-packages/transformers/tokenization_utils.py in _add_tokens(self, new_tokens, special_tokens)
465 return added_tokens
466 # TODO this is fairly slow to improve!
--> 467 current_vocab = self.get_vocab().copy()
468 new_idx = len(current_vocab) # only call this once, len gives the last index + 1
469 for token in new_tokens:

~/.cache/huggingface/modules/transformers_modules/IlyaGusev/ru-word-stress-transformer/bae83ddbb1ac2aa9295d24bbd111eaa7caf18cf5/char_tokenizer.py in get_vocab(self)
70
71 def get_vocab(self):
---> 72 return self.vocab
73
74 def _convert_token_to_id(self, token):

AttributeError: 'CharTokenizer' object has no attribute 'vocab'

Sign up or log in to comment