lighteternal
/

wav2vec2-large-xlsr-53-greek

@@ -28,6 +28,7 @@ model-index:
 ---
 # Greek (el) version of the XLSR-Wav2Vec2 automatic speech recognition (ASR) model
 * language: el
@@ -68,7 +69,7 @@ import numpy as np
 from datasets import load_dataset, load_metric
 import torch
-chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�]'
 def remove_special_characters(batch):
     batch["text"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
@@ -139,7 +140,7 @@ print("Prediction:")
 print(processor.decode(pred_ids[0]))
 # πού θέλεις να πάμε ρώτησε φοβισμένα ο βασιλιάς
-print("\\
 Reference:")
 print(common_voice_test_transcription["sentence"][example].lower())
 # πού θέλεις να πάμε; ρώτησε φοβισμένα ο βασιλιάς.
@@ -165,7 +166,7 @@ processor = Wav2Vec2Processor.from_pretrained("lighteternal/wav2vec2-large-xlsr-
 model = Wav2Vec2ForCTC.from_pretrained("lighteternal/wav2vec2-large-xlsr-53-greek")
 model.to("cuda")
-chars_to_ignore_regex = '[\\\\,\\\\?\\\\.\\\\!\\\\-\\\\;\\\\:\\\\"\\\\“\\\\%\\\\‘\\\\”\\\\�]'
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
@@ -213,7 +214,10 @@ Instructions and code to replicate the process are provided in the Fine_Tune_XLS
-### Acknowledgment
 Based on the tutorial of Patrick von Platen: https://huggingface.co/blog/fine-tune-xlsr-wav2vec2
 Original colab notebook here: https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Fine_Tune_XLSR_Wav2Vec2_on_Turkish_ASR_with_%F0%9F%A4%97_Transformers.ipynb#scrollTo=V7YOT2mnUiea

 ---
 # Greek (el) version of the XLSR-Wav2Vec2 automatic speech recognition (ASR) model
+### By the Hellenic Army Academy and the Technical University of Crete
 * language: el
 from datasets import load_dataset, load_metric
 import torch
+chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“\\\\\\\\%\\\\\\\\‘\\\\\\\\”\\\\\\\\�]'
 def remove_special_characters(batch):
     batch["text"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower() + " "
 print(processor.decode(pred_ids[0]))
 # πού θέλεις να πάμε ρώτησε φοβισμένα ο βασιλιάς
+print("\\\\
 Reference:")
 print(common_voice_test_transcription["sentence"][example].lower())
 # πού θέλεις να πάμε; ρώτησε φοβισμένα ο βασιλιάς.
 model = Wav2Vec2ForCTC.from_pretrained("lighteternal/wav2vec2-large-xlsr-53-greek")
 model.to("cuda")
+chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“\\\\\\\\%\\\\\\\\‘\\\\\\\\”\\\\\\\\�]'
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
+### Acknowledgement
+The research work was supported by the Hellenic Foundation for Research and Innovation (HFRI) under the HFRI PhD Fellowship grant (Fellowship Number:50, 2nd call)
 Based on the tutorial of Patrick von Platen: https://huggingface.co/blog/fine-tune-xlsr-wav2vec2
 Original colab notebook here: https://colab.research.google.com/github/patrickvonplaten/notebooks/blob/master/Fine_Tune_XLSR_Wav2Vec2_on_Turkish_ASR_with_%F0%9F%A4%97_Transformers.ipynb#scrollTo=V7YOT2mnUiea