update processor and readme

Browse files

Files changed (5) hide show

README.md +8 -6
config.json +3 -2
preprocessor_config.json +3 -3
pytorch_model.bin +2 -2
tokenizer_config.json +1 -1

README.md CHANGED Viewed

@@ -24,7 +24,7 @@ autoregressive cross-entropy loss and generates the transcripts autoregressively
 ## Intended uses & limitations
 This model can be used for end-to-end speech recognition (ASR).
-See the [model hub](https://huggingface.co/models?filter=speech_to_text_transformer) to look for other S2T checkpoints.
 ### How to use
@@ -35,16 +35,18 @@ transcripts by passing the speech features to the model.
 *Note: The `Speech2TextProcessor` object uses [torchaudio](https://github.com/pytorch/audio)  to extract the
 filter bank features. Make sure to install the `torchaudio` package before running this example.*
-To install `torchaudio` run `pip install torchaudio`
 ```python
 import torch
-from transformers import Speech2TextProcessor, Speech2TextTransformerForConditionalGeneration
 from datasets import load_dataset
 import soundfile as sf
-model = Speech2TextTransformerForConditionalGeneration.from_pretrained("facebook/s2t-medium-librispeech-asr")
 processor = Speech2Textprocessor.from_pretrained("facebook/s2t-medium-librispeech-asr")
 def map_to_array(batch):
@@ -76,13 +78,13 @@ The following script shows how to evaluate this model on the [LibriSpeech](https
 ```python
 from datasets import load_dataset, load_metric
-from transformers import Speech2TextTransformerForConditionalGeneration, Speech2TextProcessor
 import soundfile as sf
 librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")  # change to "other" for other test dataset
 wer = load_metric("wer")
-model = Speech2TextTransformerForConditionalGeneration.from_pretrained("facebook/s2t-medium-librispeech-asr").to("cuda")
 processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-librispeech-asr", do_upper_case=True)
 def map_to_array(batch):

 ## Intended uses & limitations
 This model can be used for end-to-end speech recognition (ASR).
+See the [model hub](https://huggingface.co/models?filter=speech_to_text) to look for other S2T checkpoints.
 ### How to use
 *Note: The `Speech2TextProcessor` object uses [torchaudio](https://github.com/pytorch/audio)  to extract the
 filter bank features. Make sure to install the `torchaudio` package before running this example.*
+You could either install those as extra speech dependancies with
+`pip install transformers"[speech, sentencepiece]"` or install the packages seperatly
+with `pip install torchaudio sentencepiece`.
 ```python
 import torch
+from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
 from datasets import load_dataset
 import soundfile as sf
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-librispeech-asr")
 processor = Speech2Textprocessor.from_pretrained("facebook/s2t-medium-librispeech-asr")
 def map_to_array(batch):
 ```python
 from datasets import load_dataset, load_metric
+from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
 import soundfile as sf
 librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")  # change to "other" for other test dataset
 wer = load_metric("wer")
+model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-medium-librispeech-asr").to("cuda")
 processor = Speech2TextProcessor.from_pretrained("facebook/s2t-medium-librispeech-asr", do_upper_case=True)
 def map_to_array(batch):

config.json CHANGED Viewed

@@ -1,8 +1,9 @@
 {
   "activation_dropout": 0.15,
   "activation_function": "relu",
   "architectures": [
-    "Speech2TextTransformerForConditionalGeneration"
   ],
   "attention_dropout": 0.15,
   "bos_token_id": 0,
@@ -33,7 +34,7 @@
   "max_length": 200,
   "max_source_positions": 6000,
   "max_target_positions": 1024,
-  "model_type": "speech_to_text_transformer",
   "num_beams": 5,
   "num_conv_layers": 2,
   "num_hidden_layers": 12,

 {
+  "_name_or_path": "hf_models_fb/s2t-medium-librispeech-asr/",
   "activation_dropout": 0.15,
   "activation_function": "relu",
   "architectures": [
+    "Speech2TextForConditionalGeneration"
   ],
   "attention_dropout": 0.15,
   "bos_token_id": 0,
   "max_length": 200,
   "max_source_positions": 6000,
   "max_target_positions": 1024,
+  "model_type": "speech_to_text",
   "num_beams": 5,
   "num_conv_layers": 2,
   "num_hidden_layers": 12,

preprocessor_config.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "do_normalize": true,
   "feature_size": 80,
-  "norm_means": true,
-  "norm_vars": true,
   "num_mel_bins": 80,
   "padding_side": "right",
   "padding_value": 0.0,

 {
+  "do_ceptral_normalize": true,
   "feature_size": 80,
+  "normalize_means": true,
+  "normalize_vars": true,
   "num_mel_bins": 80,
   "padding_side": "right",
   "padding_value": 0.0,

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8b016d9dde06a7d3f73855d19ef597b61055a5ef6d9ec0d12132c7f4077e2aea
-size 284968270

 version https://git-lfs.github.com/spec/v1
+oid sha256:57cc7b2911f0849f3d63bedffb8837e68ce407a2b2b7843c54ec0dae7037813f
+size 284953916

tokenizer_config.json CHANGED Viewed

@@ -1 +1 @@

- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "do_upper_case": false, "do_lower_case": true, "tgt_lang": null, "lang_codes": null, "special_tokens_map_file": "/home/suraj/.cache/huggingface/transformers/f39f1499e9c4d2b3e803e3cad8a31c4cf3e626e1c69197d4cd6921e5c07007f9.9d6cd81ef646692fb1c169a880161ea1cb95f49694f220aced9b704b457e51dd", "tokenizer_file": null, "name_or_path": "hf_models_fb/s2t-~~small~~-librispeech-asr/"}

+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>", "do_upper_case": false, "do_lower_case": true, "tgt_lang": null, "lang_codes": null, "special_tokens_map_file": "/home/suraj/.cache/huggingface/transformers/f39f1499e9c4d2b3e803e3cad8a31c4cf3e626e1c69197d4cd6921e5c07007f9.9d6cd81ef646692fb1c169a880161ea1cb95f49694f220aced9b704b457e51dd", "tokenizer_file": null, "name_or_path": "hf_models_fb/s2t-medium-librispeech-asr/"}