othrif
/

wav2vec_test

Automatic Speech Recognition Transformers PyTorch Arabic wav2vec2 audio speech Inference Endpoints

Model card Files Files and versions Community

patrickvonplaten commited on Mar 23, 2021

Commit

ca734d4

•

1 Parent(s): e31c31b

Update README.md

Files changed (1) hide show

README.md +5 -5

README.md CHANGED Viewed

@@ -17,7 +17,7 @@ model-index:
     dataset:
       name: arabicspeech.org MGB-3
       type: arabicspeech.org MGB-3
-      args: {lang_id}
     metrics:
        - name: Test WER
          type: wer
@@ -40,13 +40,13 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def speech_file_to_array_fn(batch):
-	speech_array, sampling_rate = torchaudio.load(batch["path"])
-	batch["speech"] = resampler(speech_array).squeeze().numpy()
-	return batch
 test_dataset = test_dataset.map(speech_file_to_array_fn)
 inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
 with torch.no_grad():
-	logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
 predicted_ids = torch.argmax(logits, dim=-1)
 print("Prediction:", processor.batch_decode(predicted_ids))
 print("Reference:", test_dataset["sentence"][:2])

     dataset:
       name: arabicspeech.org MGB-3
       type: arabicspeech.org MGB-3
+      args: ar
     metrics:
        - name: Test WER
          type: wer
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def speech_file_to_array_fn(batch):
+\tspeech_array, sampling_rate = torchaudio.load(batch["path"])
+\tbatch["speech"] = resampler(speech_array).squeeze().numpy()
+\treturn batch
 test_dataset = test_dataset.map(speech_file_to_array_fn)
 inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
 with torch.no_grad():
+\tlogits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
 predicted_ids = torch.argmax(logits, dim=-1)
 print("Prediction:", processor.batch_decode(predicted_ids))
 print("Reference:", test_dataset["sentence"][:2])