Automatic Speech Recognition
Transformers
PyTorch
speech-encoder-decoder
speech
xls_r
xls_r_translation
Inference Endpoints

Update import of processor in README.md

#3
by sanchit-gandhi HF staff - opened

Running the step-by-step example throws a TypeError when loading the Speech2Text2Processor:

import torch
from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
from datasets import load_dataset

model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
processor = Speech2Text2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
Full stack trace
----> 1 processor = Speech2Text2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")

File ~/transformers/src/transformers/processing_utils.py:186, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    156 @classmethod
    157 def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
    158     r"""
    159     Instantiate a processor associated with a pretrained model.
    160 
   (...)
    184             [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
    185     """
--> 186     args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
    187     return cls(*args)

File ~/transformers/src/transformers/processing_utils.py:230, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
    227     else:
    228         attribute_class = getattr(transformers_module, class_name)
--> 230     args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
    231 return args

File ~/transformers/src/transformers/tokenization_utils_base.py:1805, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
   1802     else:
   1803         logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1805 return cls._from_pretrained(
   1806     resolved_vocab_files,
   1807     pretrained_model_name_or_path,
   1808     init_configuration,
   1809     *init_inputs,
   1810     use_auth_token=use_auth_token,
   1811     cache_dir=cache_dir,
   1812     **kwargs,
   1813 )

File ~/transformers/src/transformers/tokenization_utils_base.py:1950, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, *init_inputs, **kwargs)
   1948 # Instantiate tokenizer.
   1949 try:
-> 1950     tokenizer = cls(*init_inputs, **init_kwargs)
   1951 except OSError:
   1952     raise OSError(
   1953         "Unable to load vocabulary from file. "
   1954         "Please check that the provided vocabulary is accessible and not corrupted."
   1955     )

File ~/transformers/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py:124, in Speech2Text2Tokenizer.__init__(self, vocab_file, bos_token, pad_token, eos_token, unk_token, do_lower_case, merges_file, **kwargs)
    113 super().__init__(
    114     unk_token=unk_token,
    115     bos_token=bos_token,
   (...)
    119     **kwargs,
    120 )
    122 self.do_lower_case = do_lower_case
--> 124 with open(vocab_file, encoding="utf-8") as vocab_handle:
    125     self.encoder = json.load(vocab_handle)
    126 self.decoder = {v: k for k, v in self.encoder.items()}

TypeError: expected str, bytes or os.PathLike object, not NoneType

This is remedied by loading the correct processor class (Wav2Vec2Processor):

import torch
from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
from datasets import load_dataset

model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
Ready to merge
This branch is ready to get merged automatically.

Sign up or log in to comment