Cannot load model or tokenizer

#33
by IJSDerbashi - opened

Good day!

Last Friday, I have loaded the model without problems, and did some tests. Today (Monday) I am not able to run the same code.
Additionally, I ran the code in the readme, and the suggested Colab cell verbatim. Everytime I get the same error, whether I load tokeniser after the model or model after tokenizer.

The error is:

tokenizer_config.json: 100%
 1.16M/1.16M [00:00<00:00, 8.50MB/s]
tokenizer.model:   0%
 0.00/4.69M [00:28<?, ?B/s]

---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py in from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
   2059                 try:
-> 2060                     resolved_vocab_files[file_id] = cached_file(
   2061                         pretrained_model_name_or_path,

10 frames

/usr/local/lib/python3.12/dist-packages/transformers/utils/hub.py in cached_file(path_or_repo_id, filename, **kwargs)
    321     """
--> 322     file = cached_files(path_or_repo_id=path_or_repo_id, filenames=[filename], **kwargs)
    323     file = file[0] if file is not None else file

/usr/local/lib/python3.12/dist-packages/transformers/utils/hub.py in cached_files(path_or_repo_id, filenames, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
    566         elif not isinstance(e, EntryNotFoundError):
--> 567             raise e
    568 

/usr/local/lib/python3.12/dist-packages/transformers/utils/hub.py in cached_files(path_or_repo_id, filenames, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)
    478             # This is slightly better for only 1 file
--> 479             hf_hub_download(
    480                 path_or_repo_id,

/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_validators.py in _inner_fn(*args, **kwargs)
    113 
--> 114         return fn(*args, **kwargs)
    115 

/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py in hf_hub_download(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)
   1009     else:
-> 1010         return _hf_hub_download_to_cache_dir(
   1011             # Destination

/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py in _hf_hub_download_to_cache_dir(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)
   1170     with WeakFileLock(lock_path):
-> 1171         _download_to_tmp_and_move(
   1172             incomplete_path=Path(blob_path + ".incomplete"),

/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py in _download_to_tmp_and_move(incomplete_path, destination_path, url_to_download, proxies, headers, expected_size, filename, force_download, etag, xet_file_data)
   1722             logger.debug("Xet Storage is enabled for this repo. Downloading file from Xet Storage..")
-> 1723             xet_get(
   1724                 incomplete_path=incomplete_path,

/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py in xet_get(incomplete_path, xet_file_data, headers, expected_size, displayed_filename, _tqdm_bar)
    628 
--> 629         download_files(
    630             xet_download_info,

RuntimeError: Data processing error: CAS service error : Reqwest Error: HTTP status server error (500 Internal Server Error), domain: https://cas-server.xethub.hf.co/reconstructions/a81fa217b67ef4a1992b48a47651c27a2a19df419eafd1aad9c0bbd5ff49bde3


During handling of the above exception, another exception occurred:

OSError                                   Traceback (most recent call last)

/tmp/ipython-input-1108580225.py in <cell line: 0>()
      2 from transformers import AutoTokenizer, AutoModelForCausalLM
      3 
----> 4 tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-1b-it")
      5 model = AutoModelForCausalLM.from_pretrained("google/gemma-3-1b-it")
      6 messages = [

/usr/local/lib/python3.12/dist-packages/transformers/models/auto/tokenization_auto.py in from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs)
   1138                     f"Tokenizer class {tokenizer_class_candidate} does not exist or is not currently imported."
   1139                 )
-> 1140             return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
   1141 
   1142         # Otherwise we have to be creative.

/usr/local/lib/python3.12/dist-packages/transformers/tokenization_utils_base.py in from_pretrained(cls, pretrained_model_name_or_path, cache_dir, force_download, local_files_only, token, revision, trust_remote_code, *init_inputs, **kwargs)
   2078                 except Exception:
   2079                     # For any other exception, we throw a generic error.
-> 2080                     raise OSError(
   2081                         f"Can't load tokenizer for '{pretrained_model_name_or_path}'. If you were trying to load it from "
   2082                         "'https://huggingface.co/models', make sure you don't have a local directory with the same name. "

OSError: Can't load tokenizer for 'google/gemma-3-1b-it'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'google/gemma-3-1b-it' is the correct path to a directory containing all relevant files for a GemmaTokenizerFast tokenizer.

This is a fresh Colab runtime so there are no directories with that name.

Additionally I tried to download the dataset manually using:

from huggingface_hub import snapshot_download

model_id = "google/gemma-3-1b-it"

local_dir = "/models/gemma3_1b"
snapshot_download(
    model_id,
    local_dir=local_dir,
    max_workers=8,
)

And I get back the following:

Fetching 10 files:  50%
 5/10 [01:15<00:03,  1.66it/s]
generation_config.json: 100%
 215/215 [00:00<00:00, 3.49kB/s]
.gitattributes: 100%
 1.68k/1.68k [00:00<00:00, 34.4kB/s]
README.md: 100%
 24.3k/24.3k [00:00<00:00, 302kB/s]
added_tokens.json: 100%
 35.0/35.0 [00:00<00:00, 379B/s]
config.json: 100%
 899/899 [00:00<00:00, 12.1kB/s]
special_tokens_map.json: 100%
 662/662 [00:00<00:00, 20.4kB/s]
tokenizer.json:   0%
 0.00/33.4M [01:02<?, ?B/s]
model.safetensors:   0%
 0.00/2.00G [01:14<?, ?B/s]
tokenizer_config.json: 100%
 1.16M/1.16M [00:00<00:00, 2.58MB/s]
tokenizer.model:   0%
 0.00/4.69M [00:26<?, ?B/s]

---------------------------------------------------------------------------

RuntimeError                              Traceback (most recent call last)

/tmp/ipython-input-1573216716.py in <cell line: 0>()
      4 
      5 local_dir = "/models/gemma3_1b"
----> 6 snapshot_download(
      7     model_id,
      8     local_dir=local_dir,

16 frames

/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_validators.py in _inner_fn(*args, **kwargs)
    112             kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
    113 
--> 114         return fn(*args, **kwargs)
    115 
    116     return _inner_fn  # type: ignore

/usr/local/lib/python3.12/dist-packages/huggingface_hub/_snapshot_download.py in snapshot_download(repo_id, repo_type, revision, cache_dir, local_dir, library_name, library_version, user_agent, proxies, etag_timeout, force_download, token, local_files_only, allow_patterns, ignore_patterns, max_workers, tqdm_class, headers, endpoint, local_dir_use_symlinks, resume_download)
    330             _inner_hf_hub_download(file)
    331     else:
--> 332         thread_map(
    333             _inner_hf_hub_download,
    334             filtered_repo_files,

/usr/local/lib/python3.12/dist-packages/tqdm/contrib/concurrent.py in thread_map(fn, *iterables, **tqdm_kwargs)
     67     """
     68     from concurrent.futures import ThreadPoolExecutor
---> 69     return _executor_map(ThreadPoolExecutor, fn, *iterables, **tqdm_kwargs)
     70 
     71 

/usr/local/lib/python3.12/dist-packages/tqdm/contrib/concurrent.py in _executor_map(PoolExecutor, fn, *iterables, **tqdm_kwargs)
     49         with PoolExecutor(max_workers=max_workers, initializer=tqdm_class.set_lock,
     50                           initargs=(lk,)) as ex:
---> 51             return list(tqdm_class(ex.map(fn, *iterables, chunksize=chunksize), **kwargs))
     52 
     53 

/usr/local/lib/python3.12/dist-packages/tqdm/notebook.py in __iter__(self)
    248         try:
    249             it = super().__iter__()
--> 250             for obj in it:
    251                 # return super(tqdm...) will not catch exception
    252                 yield obj

/usr/local/lib/python3.12/dist-packages/tqdm/std.py in __iter__(self)
   1179 
   1180         try:
-> 1181             for obj in iterable:
   1182                 yield obj
   1183                 # Update and possibly print the progressbar.

/usr/lib/python3.12/concurrent/futures/_base.py in result_iterator()
    617                     # Careful not to keep a reference to the popped future
    618                     if timeout is None:
--> 619                         yield _result_or_cancel(fs.pop())
    620                     else:
    621                         yield _result_or_cancel(fs.pop(), end_time - time.monotonic())

/usr/lib/python3.12/concurrent/futures/_base.py in _result_or_cancel(***failed resolving arguments***)
    315     try:
    316         try:
--> 317             return fut.result(timeout)
    318         finally:
    319             fut.cancel()

/usr/lib/python3.12/concurrent/futures/_base.py in result(self, timeout)
    454                     raise CancelledError()
    455                 elif self._state == FINISHED:
--> 456                     return self.__get_result()
    457                 else:
    458                     raise TimeoutError()

/usr/lib/python3.12/concurrent/futures/_base.py in __get_result(self)
    399         if self._exception:
    400             try:
--> 401                 raise self._exception
    402             finally:
    403                 # Break a reference cycle with the exception in self._exception

/usr/lib/python3.12/concurrent/futures/thread.py in run(self)
     57 
     58         try:
---> 59             result = self.fn(*self.args, **self.kwargs)
     60         except BaseException as exc:
     61             self.future.set_exception(exc)

/usr/local/lib/python3.12/dist-packages/huggingface_hub/_snapshot_download.py in _inner_hf_hub_download(repo_file)
    304     # have the file locally.
    305     def _inner_hf_hub_download(repo_file: str):
--> 306         return hf_hub_download(
    307             repo_id,
    308             filename=repo_file,

/usr/local/lib/python3.12/dist-packages/huggingface_hub/utils/_validators.py in _inner_fn(*args, **kwargs)
    112             kwargs = smoothly_deprecate_use_auth_token(fn_name=fn.__name__, has_token=has_token, kwargs=kwargs)
    113 
--> 114         return fn(*args, **kwargs)
    115 
    116     return _inner_fn  # type: ignore

/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py in hf_hub_download(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)
    988             )
    989 
--> 990         return _hf_hub_download_to_local_dir(
    991             # Destination
    992             local_dir=local_dir,

/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py in _hf_hub_download_to_local_dir(local_dir, repo_id, repo_type, filename, revision, endpoint, etag_timeout, headers, proxies, token, cache_dir, force_download, local_files_only)
   1298     with WeakFileLock(paths.lock_path):
   1299         paths.file_path.unlink(missing_ok=True)  # delete outdated file first
-> 1300         _download_to_tmp_and_move(
   1301             incomplete_path=paths.incomplete_path(etag),
   1302             destination_path=paths.file_path,

/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py in _download_to_tmp_and_move(incomplete_path, destination_path, url_to_download, proxies, headers, expected_size, filename, force_download, etag, xet_file_data)
   1721         if xet_file_data is not None and is_xet_available():
   1722             logger.debug("Xet Storage is enabled for this repo. Downloading file from Xet Storage..")
-> 1723             xet_get(
   1724                 incomplete_path=incomplete_path,
   1725                 xet_file_data=xet_file_data,

/usr/local/lib/python3.12/dist-packages/huggingface_hub/file_download.py in xet_get(incomplete_path, xet_file_data, headers, expected_size, displayed_filename, _tqdm_bar)
    627             progress.update(progress_bytes)
    628 
--> 629         download_files(
    630             xet_download_info,
    631             endpoint=connection_info.endpoint,

RuntimeError: Data processing error: CAS service error : Reqwest Error: HTTP status server error (500 Internal Server Error), domain: https://cas-server.xethub.hf.co/reconstructions/7473bad0373d89455623d1039c446aa729eb9d5a5b3a7e6316e188cee4e50a40

Can you assist with that?

UPDATE: This seemed to be an AWS error affecting the data transmission from HF.
In my case, this was fixed in ~4hours after the AWS outage was resolved.

IJSDerbashi changed discussion status to closed

Sign up or log in to comment