cahya commited on
Commit
4a5c974
1 Parent(s): 3c61c77

fixed the resampler with the correct sampling rate which improved

Browse files

the WER by more than 2% (from 20.67% to 18.32%)

Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -23,7 +23,7 @@ model-index:
23
  metrics:
24
  - name: Test WER
25
  type: wer
26
- value: 20.67
27
  ---
28
 
29
  # Wav2Vec2-Large-XLSR-Indonesian
@@ -46,12 +46,12 @@ test_dataset = load_dataset("common_voice", "id", split="test[:2%]")
46
  processor = Wav2Vec2Processor.from_pretrained("indonesian-nlp/wav2vec2-large-xlsr-indonesian")
47
  model = Wav2Vec2ForCTC.from_pretrained("indonesian-nlp/wav2vec2-large-xlsr-indonesian")
48
 
49
- resampler = torchaudio.transforms.Resample(48_000, 16_000)
50
 
51
  # Preprocessing the datasets.
52
  # We need to read the aduio files as arrays
53
  def speech_file_to_array_fn(batch):
54
  speech_array, sampling_rate = torchaudio.load(batch["path"])
 
55
  batch["speech"] = resampler(speech_array).squeeze().numpy()
56
  return batch
57
 
@@ -117,7 +117,7 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
117
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
118
  ```
119
 
120
- **Test Result**: 20.67 %
121
 
122
  ## Training
123
 
23
  metrics:
24
  - name: Test WER
25
  type: wer
26
+ value: 18.32
27
  ---
28
 
29
  # Wav2Vec2-Large-XLSR-Indonesian
46
  processor = Wav2Vec2Processor.from_pretrained("indonesian-nlp/wav2vec2-large-xlsr-indonesian")
47
  model = Wav2Vec2ForCTC.from_pretrained("indonesian-nlp/wav2vec2-large-xlsr-indonesian")
48
 
 
49
 
50
  # Preprocessing the datasets.
51
  # We need to read the aduio files as arrays
52
  def speech_file_to_array_fn(batch):
53
  speech_array, sampling_rate = torchaudio.load(batch["path"])
54
+ resampler = torchaudio.transforms.Resample(sampling_rate, 16_000)
55
  batch["speech"] = resampler(speech_array).squeeze().numpy()
56
  return batch
57
 
117
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
118
  ```
119
 
120
+ **Test Result**: 18.32 %
121
 
122
  ## Training
123