lighteternal commited on
Commit
637fe0c
1 Parent(s): aa103c4

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +8 -4
README.md CHANGED
@@ -28,6 +28,7 @@ model-index:
28
  ---
29
 
30
  # Greek (el) version of the XLSR-Wav2Vec2 automatic speech recognition (ASR) model
 
31
 
32
 
33
  * language: el
@@ -68,7 +69,7 @@ import numpy as np
68
  from datasets import load_dataset, load_metric
69
  import torch
70
 
71
- chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�]'
72
 
73
  def remove_special_characters(batch):
74
  batch["text"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
@@ -139,7 +140,7 @@ print("Prediction:")
139
  print(processor.decode(pred_ids[0]))
140
  # πού θέλεις να πάμε ρώτησε φοβισμένα ο βασιλιάς
141
 
142
- print("\\
143
  Reference:")
144
  print(common_voice_test_transcription["sentence"][example].lower())
145
  # πού θέλεις να πάμε; ρώτησε φοβισμένα ο βασιλιάς.
@@ -165,7 +166,7 @@ processor = Wav2Vec2Processor.from_pretrained("lighteternal/wav2vec2-large-xlsr-
165
  model = Wav2Vec2ForCTC.from_pretrained("lighteternal/wav2vec2-large-xlsr-53-greek")
166
  model.to("cuda")
167
 
168
- chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�]'
169
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
170
 
171
  # Preprocessing the datasets.
@@ -213,7 +214,10 @@ Instructions and code to replicate the process are provided in the Fine_Tune_XLS
213
 
214
 
215
 
 
 
 
 
216
 
217
- ### Acknowledgment
218
  Based on the tutorial of Patrick von Platen: https://huggingface.co/blog/fine-tune-xlsr-wav2vec2
219
  Original colab notebook here: https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Fine_Tune_XLSR_Wav2Vec2_on_Turkish_ASR_with_%F0%9F%A4%97_Transformers.ipynb#scrollTo=V7YOT2mnUiea
28
  ---
29
 
30
  # Greek (el) version of the XLSR-Wav2Vec2 automatic speech recognition (ASR) model
31
+ ### By the Hellenic Army Academy and the Technical University of Crete
32
 
33
 
34
  * language: el
69
  from datasets import load_dataset, load_metric
70
  import torch
71
 
72
+ chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“\\\\\\\\%\\\\\\\\‘\\\\\\\\”\\\\\\\\�]'
73
 
74
  def remove_special_characters(batch):
75
  batch["text"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
140
  print(processor.decode(pred_ids[0]))
141
  # πού θέλεις να πάμε ρώτησε φοβισμένα ο βασιλιάς
142
 
143
+ print("\\\\
144
  Reference:")
145
  print(common_voice_test_transcription["sentence"][example].lower())
146
  # πού θέλεις να πάμε; ρώτησε φοβισμένα ο βασιλιάς.
166
  model = Wav2Vec2ForCTC.from_pretrained("lighteternal/wav2vec2-large-xlsr-53-greek")
167
  model.to("cuda")
168
 
169
+ chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“\\\\\\\\%\\\\\\\\‘\\\\\\\\”\\\\\\\\�]'
170
  resampler = torchaudio.transforms.Resample(48_000, 16_000)
171
 
172
  # Preprocessing the datasets.
214
 
215
 
216
 
217
+ ### Acknowledgement
218
+
219
+ The research work was supported by the Hellenic Foundation for Research and Innovation (HFRI) under the HFRI PhD Fellowship grant (Fellowship Number:50, 2nd call)
220
+
221
 
 
222
  Based on the tutorial of Patrick von Platen: https://huggingface.co/blog/fine-tune-xlsr-wav2vec2
223
  Original colab notebook here: https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Fine_Tune_XLSR_Wav2Vec2_on_Turkish_ASR_with_%F0%9F%A4%97_Transformers.ipynb#scrollTo=V7YOT2mnUiea