patrickvonplaten commited on
Commit
7b6c44f
1 Parent(s): 858530d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -8
README.md CHANGED
@@ -50,20 +50,13 @@ The model can be used for automatic-speech-recognition as follows:
50
  import torch
51
  from transformers import Wav2Vec2Processor, HubertForCTC
52
  from datasets import load_dataset
53
- import soundfile as sf
54
 
55
  processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-xlarge-ls960-ft")
56
  model = HubertForCTC.from_pretrained("facebook/hubert-xlarge-ls960-ft")
57
-
58
- def map_to_array(batch):
59
- speech, _ = sf.read(batch["file"])
60
- batch["speech"] = speech
61
- return batch
62
 
63
  ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
64
- ds = ds.map(map_to_array)
65
 
66
- input_values = processor(ds["speech"][0], return_tensors="pt").input_values # Batch size 1
67
  logits = model(input_values).logits
68
  predicted_ids = torch.argmax(logits, dim=-1)
69
  transcription = processor.decode(predicted_ids[0])
 
50
  import torch
51
  from transformers import Wav2Vec2Processor, HubertForCTC
52
  from datasets import load_dataset
 
53
 
54
  processor = Wav2Vec2Processor.from_pretrained("facebook/hubert-xlarge-ls960-ft")
55
  model = HubertForCTC.from_pretrained("facebook/hubert-xlarge-ls960-ft")
 
 
 
 
 
56
 
57
  ds = load_dataset("patrickvonplaten/librispeech_asr_dummy", "clean", split="validation")
 
58
 
59
+ input_values = processor(ds[0]["audio"]["array"],, return_tensors="pt").input_values # Batch size 1
60
  logits = model(input_values).logits
61
  predicted_ids = torch.argmax(logits, dim=-1)
62
  transcription = processor.decode(predicted_ids[0])