Automatic Speech Recognition
Transformers
PyTorch
speech-encoder-decoder
speech
xls_r
xls_r_translation
Inference Endpoints
sanchit-gandhi HF staff commited on
Commit
c21534a
1 Parent(s): 4df5c4f

Update import of processor in README.md

Browse files

Running the step-by-step example throws a TypeError when loading the `Speech2Text2Processor`:
```python
import torch
from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
from datasets import load_dataset

model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
processor = Speech2Text2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")

```

<details>

<summary> Full stack trace </summary>

```python
----> 1 processor = Speech2Text2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")

File ~/transformers/src/transformers/processing_utils.py:186, in ProcessorMixin.from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
156 @classmethod
157 def from_pretrained(cls, pretrained_model_name_or_path, **kwargs):
158 r"""
159 Instantiate a processor associated with a pretrained model.
160
(...)
184 [`~tokenization_utils_base.PreTrainedTokenizer.from_pretrained`].
185 """
--> 186 args = cls._get_arguments_from_pretrained(pretrained_model_name_or_path, **kwargs)
187 return cls(*args)

File ~/transformers/src/transformers/processing_utils.py:230, in ProcessorMixin._get_arguments_from_pretrained(cls, pretrained_model_name_or_path, **kwargs)
227 else:
228 attribute_class = getattr(transformers_module, class_name)
--> 230 args.append(attribute_class.from_pretrained(pretrained_model_name_or_path, **kwargs))
231 return args

File ~/transformers/src/transformers/tokenization_utils_base.py:1805, in PreTrainedTokenizerBase.from_pretrained(cls, pretrained_model_name_or_path, *init_inputs, **kwargs)
1802 else:
1803 logger.info(f"loading file {file_path} from cache at {resolved_vocab_files[file_id]}")
-> 1805 return cls._from_pretrained(
1806 resolved_vocab_files,
1807 pretrained_model_name_or_path,
1808 init_configuration,
1809 *init_inputs,
1810 use_auth_token=use_auth_token,
1811 cache_dir=cache_dir,
1812 **kwargs,
1813 )

File ~/transformers/src/transformers/tokenization_utils_base.py:1950, in PreTrainedTokenizerBase._from_pretrained(cls, resolved_vocab_files, pretrained_model_name_or_path, init_configuration, use_auth_token, cache_dir, *init_inputs, **kwargs)
1948 # Instantiate tokenizer.
1949 try:
-> 1950 tokenizer = cls(*init_inputs, **init_kwargs)
1951 except OSError:
1952 raise OSError(
1953 "Unable to load vocabulary from file. "
1954 "Please check that the provided vocabulary is accessible and not corrupted."
1955 )

File ~/transformers/src/transformers/models/speech_to_text_2/tokenization_speech_to_text_2.py:124, in Speech2Text2Tokenizer.__init__(self, vocab_file, bos_token, pad_token, eos_token, unk_token, do_lower_case, merges_file, **kwargs)
113 super().__init__(
114 unk_token=unk_token,
115 bos_token=bos_token,
(...)
119 **kwargs,
120 )
122 self.do_lower_case = do_lower_case
--> 124 with open(vocab_file, encoding="utf-8") as vocab_handle:
125 self.encoder = json.load(vocab_handle)
126 self.decoder = {v: k for k, v in self.encoder.items()}

TypeError: expected str, bytes or os.PathLike object, not NoneType
```

</details>

This is remedied by loading the correct processor class (`Wav2Vec2Processor`):

```python
import torch
from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
from datasets import load_dataset

model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
```

Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -115,11 +115,11 @@ or step-by-step as follows:
115
 
116
  ```python
117
  import torch
118
- from transformers import Speech2Text2Processor, SpeechEncoderDecoderModel
119
  from datasets import load_dataset
120
 
121
  model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
122
- processor = Speech2Text2Processor.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
123
 
124
  ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
125
 
 
115
 
116
  ```python
117
  import torch
118
+ from transformers import Wav2Vec2Processor, SpeechEncoderDecoderModel
119
  from datasets import load_dataset
120
 
121
  model = SpeechEncoderDecoderModel.from_pretrained("facebook/wav2vec2-xls-r-300m-21-to-en")
122
+ processor = Wav2Vec2Processor("facebook/wav2vec2-xls-r-300m-21-to-en")
123
 
124
  ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
125