marinone94
/

xls-r-300m-sv-robust

Automatic Speech Recognition

mozilla-foundation/common_voice_9_0

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Community

marinone94 commited on Feb 4, 2022

Commit

41abd72

•

1 Parent(s): ef3cdad

add eval scripts

Files changed (3) hide show

eval.py +12 -1
run_eval_cv.sh +7 -0
run_eval_real_world.sh +7 -0

eval.py CHANGED Viewed

@@ -51,7 +51,18 @@ def normalize_text(text: str) -> str:
     chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]'  # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
-    text = re.sub(chars_to_ignore_regex, "", text.lower())
     # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # note that order is important here!

     chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]'  # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
+    text = re.sub(chars_to_ignore_regex, "", text.lower()) \
+        .replace("\\\\punkt", "") \
+        .replace("\\\\komma", "") \
+        .replace("è", "e") \
+        .replace("é", "e") \
+        .replace("î", "i") \
+        .replace("ü", "u") \
+        .replace("ÿ", "y") \
+        .replace("ô", "o") \
+        .replace("\\", "") \
+        .replace("/", "") \
+        .replace("|", "")
     # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # note that order is important here!

run_eval_cv.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+python eval.py \
+    --model_id marinone94/xls-r-300m-sv-robust \
+    --dataset mozilla-foundation/common_voice_8_0 \
+    --config sv-SE \
+    --split test \
+    --chunk_length_s 10 \
+    --stride_length_s 1

run_eval_real_world.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+python eval.py \
+    --model_id marinone94/xls-r-300m-sv-robust \
+    --dataset speech-recognition-community-v2/dev_data \
+    --config sv \
+    --split validation \
+    --chunk_length_s 10 \
+    --stride_length_s 1