andreagasparini commited on
Commit
d779cee
1 Parent(s): 750319d

Adds comment explaination of when to resample

Browse files
Files changed (1) hide show
  1. README.md +2 -0
README.md CHANGED
@@ -102,6 +102,8 @@ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
102
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
103
 
104
  def map_to_pred(batch):
 
 
105
  inputs = processor(batch["audio"]["array"], return_tensors="pt", padding="longest", sampling_rate=batch["audio"]["sampling_rate"])
106
  input_values = inputs.input_values.to("cuda")
107
  attention_mask = inputs.attention_mask.to("cuda")
102
  processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
103
 
104
  def map_to_pred(batch):
105
+ # LibriSpeech sampling rate (16kHz) is equal to Wav2Vec2 processor sampling rate -> pass audio directly to processor
106
+ # Note that in case the sampling rates do not match you would have to resample!
107
  inputs = processor(batch["audio"]["array"], return_tensors="pt", padding="longest", sampling_rate=batch["audio"]["sampling_rate"])
108
  input_values = inputs.input_values.to("cuda")
109
  attention_mask = inputs.attention_mask.to("cuda")