jimregan commited on
Commit
0ec2efd
β€’
1 Parent(s): 1e90d22

actually fix

Browse files
Files changed (1) hide show
  1. README.md +26 -27
README.md CHANGED
@@ -69,45 +69,44 @@ model.to("cuda")
69
  # So, tolower() for Irish is a bit complicated: tAthar -> t-athair
70
  # toupper() is non-deterministic :)
71
  def is_upper_vowel(letter):
72
- if letter in ['A', 'E', 'I', 'O', 'U', 'Á', 'Γ‰', 'Í', 'Γ“', 'Ú']:
73
- return True
74
- else:
75
- return False
76
  def irish_lower(word):
77
- if len(word) > 1 and word[0] in ['n', 't'] and is_upper_vowel(word[1]):
78
- return word[0] + '-' + word[1:].lower()
79
- else:
80
- return word.lower()
81
  def irish_lower_sentence(sentence):
82
- return " ".join([irish_lower(w) for w in sentence.split(" ")])
83
  chars_to_ignore_regex = '[,\?\.\!\;\:\"\β€œ\%\β€˜\”\(\)\*]'
84
- def remove_special_characters(batch):
85
- tmp = re.sub('’ ', ' ', batch["sentence"])
86
- tmp = re.sub("’$", '', tmp)
87
- tmp = re.sub('’', '\'', tmp)
88
- tmp = re.sub(chars_to_ignore_regex, '', tmp)
89
- batch["sentence"] = irish_lower_sentence(tmp) + ' '
90
- return batch
91
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
92
  # Preprocessing the datasets.
93
  # We need to read the audio files as arrays
94
  def speech_file_to_array_fn(batch):
95
- batch["sentence"] = remove_special_characters(batch)
96
- speech_array, sampling_rate = torchaudio.load(batch["path"])
97
- batch["speech"] = resampler(speech_array).squeeze().numpy()
98
- return batch
99
  test_dataset = test_dataset.map(speech_file_to_array_fn)
100
  # Preprocessing the datasets.
101
  # We need to read the audio files as arrays
102
  def evaluate(batch):
103
- inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
104
- with torch.no_grad():
105
- logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
106
- pred_ids = torch.argmax(logits, dim=-1)
107
- batch["pred_strings"] = processor.batch_decode(pred_ids)
108
- return batch
109
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
110
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
111
  ```
112
  **Test Result**: 49.3 %
113
- ```
69
  # So, tolower() for Irish is a bit complicated: tAthar -> t-athair
70
  # toupper() is non-deterministic :)
71
  def is_upper_vowel(letter):
72
+ if letter in ['A', 'E', 'I', 'O', 'U', 'Á', 'Γ‰', 'Í', 'Γ“', 'Ú']:
73
+ return True
74
+ else:
75
+ return False
76
  def irish_lower(word):
77
+ if len(word) > 1 and word[0] in ['n', 't'] and is_upper_vowel(word[1]):
78
+ return word[0] + '-' + word[1:].lower()
79
+ else:
80
+ return word.lower()
81
  def irish_lower_sentence(sentence):
82
+ return " ".join([irish_lower(w) for w in sentence.split(" ")])
83
  chars_to_ignore_regex = '[,\?\.\!\;\:\"\β€œ\%\β€˜\”\(\)\*]'
84
+ def remove_special_characters(sentence):
85
+ tmp = re.sub('’ ', ' ', sentence)
86
+ tmp = re.sub("’$", '', tmp)
87
+ tmp = re.sub('’', '\'', tmp)
88
+ tmp = re.sub(chars_to_ignore_regex, '', tmp)
89
+ sentence = irish_lower_sentence(tmp) + ' '
90
+ return sentence
91
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
92
  # Preprocessing the datasets.
93
  # We need to read the audio files as arrays
94
  def speech_file_to_array_fn(batch):
95
+ batch["sentence"] = remove_special_characters(batch["sentence"])
96
+ speech_array, sampling_rate = torchaudio.load(batch["path"])
97
+ batch["speech"] = resampler(speech_array).squeeze().numpy()
98
+ return batch
99
  test_dataset = test_dataset.map(speech_file_to_array_fn)
100
  # Preprocessing the datasets.
101
  # We need to read the audio files as arrays
102
  def evaluate(batch):
103
+ inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
104
+ with torch.no_grad():
105
+ logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
106
+ pred_ids = torch.argmax(logits, dim=-1)
107
+ batch["pred_strings"] = processor.batch_decode(pred_ids)
108
+ return batch
109
  result = test_dataset.map(evaluate, batched=True, batch_size=8)
110
  print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
111
  ```
112
  **Test Result**: 49.3 %