patrickvonplaten commited on
Commit
fb0e041
β€’
1 Parent(s): dc7d85c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +6 -6
README.md CHANGED
@@ -17,7 +17,7 @@ model-index:
17
  dataset:
18
  name: Common Voice ky
19
  type: common_voice
20
- args: {lang_id}
21
  metrics:
22
  - name: Test WER
23
  type: wer
@@ -49,15 +49,15 @@ resampler = torchaudio.transforms.Resample(48_000, 16_000)
49
  # Preprocessing the datasets.
50
  # We need to read the aduio files as arrays
51
  def speech_file_to_array_fn(batch):
52
- \\tspeech_array, sampling_rate = torchaudio.load(batch["path"])
53
- \\tbatch["speech"] = resampler(speech_array).squeeze().numpy()
54
- \\treturn batch
55
 
56
  test_dataset = test_dataset.map(speech_file_to_array_fn)
57
  inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
58
 
59
  with torch.no_grad():
60
- \\tlogits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
61
 
62
  predicted_ids = torch.argmax(logits, dim=-1)
63
 
@@ -85,7 +85,7 @@ processor = Wav2Vec2Processor.from_pretrained("iarfmoose/wav2vec2-large-xlsr-kyr
85
  model = Wav2Vec2ForCTC.from_pretrained("iarfmoose/wav2vec2-large-xlsr-kyrgyz")
86
  model.to("cuda")
87
 
88
- chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\β€œ\\\\%\\\\β€˜\\\\”\\\\οΏ½\\\\–\\\\β€”\\\\Β¬\\\\β…›]'
89
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
90
 
91
  def speech_file_to_array_fn(batch):
17
  dataset:
18
  name: Common Voice ky
19
  type: common_voice
20
+ args: ky
21
  metrics:
22
  - name: Test WER
23
  type: wer
49
  # Preprocessing the datasets.
50
  # We need to read the aduio files as arrays
51
  def speech_file_to_array_fn(batch):
52
+ \\\\tspeech_array, sampling_rate = torchaudio.load(batch["path"])
53
+ \\\\tbatch["speech"] = resampler(speech_array).squeeze().numpy()
54
+ \\\\treturn batch
55
 
56
  test_dataset = test_dataset.map(speech_file_to_array_fn)
57
  inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
58
 
59
  with torch.no_grad():
60
+ \\\\tlogits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
61
 
62
  predicted_ids = torch.argmax(logits, dim=-1)
63
 
85
  model = Wav2Vec2ForCTC.from_pretrained("iarfmoose/wav2vec2-large-xlsr-kyrgyz")
86
  model.to("cuda")
87
 
88
+ chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\β€œ\\\\\\\\%\\\\\\\\β€˜\\\\\\\\”\\\\\\\\οΏ½\\\\\\\\–\\\\\\\\β€”\\\\\\\\Β¬\\\\\\\\β…›]'
89
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
90
 
91
  def speech_file_to_array_fn(batch):