patrickvonplaten commited on
Commit
9901e0b
1 Parent(s): 8558f54

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -1
README.md CHANGED
@@ -28,6 +28,7 @@ The only change from the existing ASR pipeline will be:
28
 
29
  ```diff
30
  import torch
 
31
  -from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
32
  +from transformers import Wav2Vec2ForCTC, Wav2Vec2ProcessorWithLM
33
  from datasets import load_dataset
@@ -36,11 +37,13 @@ ds = load_dataset("common_voice", "es", split="test", streaming=True)
36
 
37
  sample = next(iter(ds))
38
 
 
 
39
  model = Wav2Vec2ForCTC.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
40
  -processor = Wav2Vec2Processor.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
41
  +processor = Wav2Vec2ProcessorWithLM.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
42
 
43
- input_values = processor(sample["audio"]["array"], return_tensors="pt").input_values
44
 
45
  with torch.no_grad():
46
  logits = model(input_values).logits
 
28
 
29
  ```diff
30
  import torch
31
+ import torchaudio.functional as F
32
  -from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
33
  +from transformers import Wav2Vec2ForCTC, Wav2Vec2ProcessorWithLM
34
  from datasets import load_dataset
 
37
 
38
  sample = next(iter(ds))
39
 
40
+ resampled_audio = F.resample(torch.tensor(sample["audio"]["array"]), 48_000, 16_000).n
41
+
42
  model = Wav2Vec2ForCTC.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
43
  -processor = Wav2Vec2Processor.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
44
  +processor = Wav2Vec2ProcessorWithLM.from_pretrained("patrickvonplaten/wav2vec2-large-xlsr-53-spanish-with-lm")
45
 
46
+ input_values = processor(resampled_audio, return_tensors="pt").input_values
47
 
48
  with torch.no_grad():
49
  logits = model(input_values).logits