skylord
/

greek_lsr_1

@@ -1,5 +1,5 @@
 ---
-language: {el}
 datasets:
 - common_voice
 metrics:
@@ -11,24 +11,24 @@ tags:
 - xlsr-fine-tuning-week
 license: apache-2.0
 model-index:
-- name: {Greek XLSR Wav2Vec2 Large 53}
 results:
 - task:
     name: Speech Recognition
     type: automatic-speech-recognition
     dataset:
-      name: Common Voice {el}
       type: common_voice
-      args: {el}
     metrics:
        - name: Test WER
          type: wer
-         value: {1.00}
 ---
-# Wav2Vec2-Large-XLSR-53-{Greek}
-Fine-tuned [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on {Greek} using the [Common Voice](https://huggingface.co/datasets/common_voice), ... and ... dataset{s}. #TODO: replace {language} with your language, *e.g.* French and eventually add more datasets that were used and eventually remove common voice if model was not trained on common voice
 When using this model, make sure that your speech input is sampled at 16kHz.
 ## Usage
@@ -40,20 +40,26 @@ import torch
 import torchaudio
 from datasets import load_dataset
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
-test_dataset = load_dataset("common_voice", "{el}", split="test[:2%]") #TODO: replace {lang_id} in your language code here. Make sure the code is one of the *ISO codes* of [this](https://huggingface.co/languages) site.
-processor = Wav2Vec2Processor.from_pretrained("{skylord/greek_lsr_1}") #TODO: replace {model_id} with your model id. The model id consists of {your_username}/{your_modelname}, *e.g.* `elgeish/wav2vec2-large-xlsr-53-arabic`
-model = Wav2Vec2ForCTC.from_pretrained("{skylord/greek_lsr_1}") #TODO: replace {model_id} with your model id. The model id consists of {your_username}/{your_modelname}, *e.g.* `elgeish/wav2vec2-large-xlsr-53-arabic`
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def speech_file_to_array_fn(batch):
-	speech_array, sampling_rate = torchaudio.load(batch["path"])
-	batch["speech"] = resampler(speech_array).squeeze().numpy()
-	return batch
 test_dataset = test_dataset.map(speech_file_to_array_fn)
 inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
 with torch.no_grad():
-	logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
 predicted_ids = torch.argmax(logits, dim=-1)
 print("Prediction:", processor.batch_decode(predicted_ids))
 print("Reference:", test_dataset["sentence"][:2])
@@ -62,7 +68,7 @@ print("Reference:", test_dataset["sentence"][:2])
 ## Evaluation
-The model can be evaluated as follows on the {Greek} test data of Common Voice.  # TODO: replace #TODO: replace language with your {language}, *e.g.* French
 ```python
@@ -71,35 +77,44 @@ import torchaudio
 from datasets import load_dataset, load_metric
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import re
-test_dataset = load_dataset("common_voice", "{el}", split="test") #TODO: replace {lang_id} in your language code here. Make sure the code is one of the *ISO codes* of [this](https://huggingface.co/languages) site.
 wer = load_metric("wer")
-processor = Wav2Vec2Processor.from_pretrained("{skylord/greek_lsr_1}") #TODO: replace {model_id} with your model id. The model id consists of {your_username}/{your_modelname}, *e.g.* `elgeish/wav2vec2-large-xlsr-53-arabic`
-model = Wav2Vec2ForCTC.from_pretrained("{skylord/greek_lsr_1}") #TODO: replace {model_id} with your model id. The model id consists of {your_username}/{your_modelname}, *e.g.* `elgeish/wav2vec2-large-xlsr-53-arabic`
 model.to("cuda")
-chars_to_ignore_regex = '[\,\?\.\!\-\;\:\"\“]'  # TODO: adapt this list to include all special characters you removed from the data
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def speech_file_to_array_fn(batch):
-	batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
-	speech_array, sampling_rate = torchaudio.load(batch["path"])
-	batch["speech"] = resampler(speech_array).squeeze().numpy()
-	return batch
 test_dataset = test_dataset.map(speech_file_to_array_fn)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def evaluate(batch):
-	inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
-	with torch.no_grad():
-		logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
-	pred_ids = torch.argmax(logits, dim=-1)
-	batch["pred_strings"] = processor.batch_decode(pred_ids)
-	return batch
 result = test_dataset.map(evaluate, batched=True, batch_size=8)
 print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
 ```
-**Test Result**: 1.00 %  # TODO: write output of print here. IMPORTANT: Please remember to also replace {wer_result_on_test} at the top of with this value here. tags.
 ## Training

 ---
+language: el
 datasets:
 - common_voice
 metrics:
 - xlsr-fine-tuning-week
 license: apache-2.0
 model-index:
+- name: Greek XLSR Wav2Vec2 Large 53
 results:
 - task:
     name: Speech Recognition
     type: automatic-speech-recognition
     dataset:
+      name: Common Voice el
       type: common_voice
+      args: el
     metrics:
        - name: Test WER
          type: wer
+         value: 56.253154
 ---
+# Wav2Vec2-Large-XLSR-53-Greek
+Fine-tuned [facebook/wav2vec2-large-xlsr-53](https://huggingface.co/facebook/wav2vec2-large-xlsr-53) on Greek using the [Common Voice](https://huggingface.co/datasets/common_voice), ... and ... dataset{s}. #TODO: replace {language} with your language, *e.g.* French and eventually add more datasets that were used and eventually remove common voice if model was not trained on common voice
 When using this model, make sure that your speech input is sampled at 16kHz.
 ## Usage
 import torchaudio
 from datasets import load_dataset
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
+test_dataset = load_dataset("common_voice", "el", split="test[:2%]")
+processor = Wav2Vec2Processor.from_pretrained("skylord/greek_lsr_1")
+model = Wav2Vec2ForCTC.from_pretrained("skylord/greek_lsr_1")
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def speech_file_to_array_fn(batch):
+  speech_array, sampling_rate = torchaudio.load(batch["path"])
+  batch["speech"] = resampler(speech_array).squeeze().numpy()
+  return batch
 test_dataset = test_dataset.map(speech_file_to_array_fn)
 inputs = processor(test_dataset["speech"][:2], sampling_rate=16_000, return_tensors="pt", padding=True)
 with torch.no_grad():
+  logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
 predicted_ids = torch.argmax(logits, dim=-1)
 print("Prediction:", processor.batch_decode(predicted_ids))
 print("Reference:", test_dataset["sentence"][:2])
 ## Evaluation
+The model can be evaluated as follows on the Greek test data of Common Voice.
 ```python
 from datasets import load_dataset, load_metric
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
 import re
+test_dataset = load_dataset("common_voice", "el", split="test")
 wer = load_metric("wer")
+processor = Wav2Vec2Processor.from_pretrained("skylord/greek_lsr_1")
+model = Wav2Vec2ForCTC.from_pretrained("skylord/greek_lsr_1")
 model.to("cuda")
+chars_to_ignore_regex = '[\\\\\\\\,\\\\\\\\?\\\\\\\\.\\\\\\\\!\\\\\\\\-\\\\\\\\;\\\\\\\\:\\\\\\\\"\\\\\\\\“]'
 resampler = torchaudio.transforms.Resample(48_000, 16_000)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def speech_file_to_array_fn(batch):
+  batch["sentence"] = re.sub(chars_to_ignore_regex, '', batch["sentence"]).lower()
+  speech_array, sampling_rate = torchaudio.load(batch["path"])
+  batch["speech"] = resampler(speech_array).squeeze().numpy()
+  return batch
 test_dataset = test_dataset.map(speech_file_to_array_fn)
 # Preprocessing the datasets.
 # We need to read the aduio files as arrays
 def evaluate(batch):
+  inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
+  with torch.no_grad():
+    logits = model(inputs.input_values.to("cuda"), attention_mask=inputs.attention_mask.to("cuda")).logits
+  pred_ids = torch.argmax(logits, dim=-1)
+  batch["pred_strings"] = processor.batch_decode(pred_ids)
+  return batch
 result = test_dataset.map(evaluate, batched=True, batch_size=8)
 print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
 ```
+**Test Result**: 56.253154 %
 ## Training

README.md CHANGED Viewed

@@ -23,7 +23,7 @@ results:
     metrics:
        - name: Test WER
          type: wer
-         value: ???
 ---
 # Wav2Vec2-Large-XLSR-53-Greek
@@ -114,7 +114,7 @@ result = test_dataset.map(evaluate, batched=True, batch_size=8)
 print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
 ```
-**Test Result**: ??? %
 ## Training

     metrics:
        - name: Test WER
          type: wer
+         value: 56.253154
 ---
 # Wav2Vec2-Large-XLSR-53-Greek
 print("WER: {:2f}".format(100 * wer.compute(predictions=result["pred_strings"], references=result["sentence"])))
 ```
+**Test Result**: 56.253154 %
 ## Training