patrickvonplaten commited on
Commit
29d8ab9
1 Parent(s): 11d2712

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -15
README.md CHANGED
@@ -69,25 +69,18 @@ with `pip install torchaudio sentencepiece`.
69
  import torch
70
  from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
71
  from datasets import load_dataset
72
- import soundfile as sf
73
 
74
  model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
75
  processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
76
 
77
- def map_to_array(batch):
78
- speech, _ = sf.read(batch["file"])
79
- batch["speech"] = speech
80
- return batch
81
-
82
  ds = load_dataset(
83
  "patrickvonplaten/librispeech_asr_dummy",
84
  "clean",
85
  split="validation"
86
  )
87
- ds = ds.map(map_to_array)
88
 
89
  input_features = processor(
90
- ds["speech"][0],
91
  sampling_rate=16_000,
92
  return_tensors="pt"
93
  ).input_features # Batch size 1
@@ -104,7 +97,6 @@ The following script shows how to evaluate this model on the [LibriSpeech](https
104
  ```python
105
  from datasets import load_dataset, load_metric
106
  from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
107
- import soundfile as sf
108
 
109
  librispeech_eval = load_dataset("librispeech_asr", "clean", split="test") # change to "other" for other test dataset
110
  wer = load_metric("wer")
@@ -112,15 +104,10 @@ wer = load_metric("wer")
112
  model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr").to("cuda")
113
  processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr", do_upper_case=True)
114
 
115
- def map_to_array(batch):
116
- speech, _ = sf.read(batch["file"])
117
- batch["speech"] = speech
118
- return batch
119
-
120
  librispeech_eval = librispeech_eval.map(map_to_array)
121
 
122
  def map_to_pred(batch):
123
- features = processor(batch["speech"], sampling_rate=16000, padding=True, return_tensors="pt")
124
  input_features = features.input_features.to("cuda")
125
  attention_mask = features.attention_mask.to("cuda")
126
 
 
69
  import torch
70
  from transformers import Speech2TextProcessor, Speech2TextForConditionalGeneration
71
  from datasets import load_dataset
 
72
 
73
  model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr")
74
  processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr")
75
 
 
 
 
 
 
76
  ds = load_dataset(
77
  "patrickvonplaten/librispeech_asr_dummy",
78
  "clean",
79
  split="validation"
80
  )
 
81
 
82
  input_features = processor(
83
+ ds["audio"]["array"][0],
84
  sampling_rate=16_000,
85
  return_tensors="pt"
86
  ).input_features # Batch size 1
 
97
  ```python
98
  from datasets import load_dataset, load_metric
99
  from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
 
100
 
101
  librispeech_eval = load_dataset("librispeech_asr", "clean", split="test") # change to "other" for other test dataset
102
  wer = load_metric("wer")
 
104
  model = Speech2TextForConditionalGeneration.from_pretrained("facebook/s2t-small-librispeech-asr").to("cuda")
105
  processor = Speech2TextProcessor.from_pretrained("facebook/s2t-small-librispeech-asr", do_upper_case=True)
106
 
 
 
 
 
 
107
  librispeech_eval = librispeech_eval.map(map_to_array)
108
 
109
  def map_to_pred(batch):
110
+ features = processor(batch["audio"]["array"], sampling_rate=16000, padding=True, return_tensors="pt")
111
  input_features = features.input_features.to("cuda")
112
  attention_mask = features.attention_mask.to("cuda")
113