Plim
/

xls-r-1b-cv_8-fr

Automatic Speech Recognition

mozilla-foundation/common_voice_8_0

Generated from Trainer

robust-speech-event

hf-asr-leaderboard

Inference Endpoints

Model card Files Files and versions Community

Plim commited on Feb 8, 2022

Commit

3aea4ea

·

1 Parent(s): 0c54d4b

repush model but with git lfs tracking

Files changed (2) hide show

eval.py +3 -7
pytorch_model.bin +3 -0

eval.py CHANGED Viewed

@@ -48,20 +48,16 @@ def log_results(result: Dataset, args: Dict[str, str]):
 def normalize_text(text: str) -> str:
     """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
-    chars_to_ignore_regex = '[^a-zàâäçéèêëîïôöùûüÿ\'’ ]'  # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
-    text = re.sub(chars_to_ignore_regex, "", text.lower()).replace('’', "'")
     # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # note that order is important here!
     token_sequences_to_ignore = ["\n\n", "\n", "   ", "  "]
     for t in token_sequences_to_ignore:
         text = " ".join(text.split(t))
-    return text
 def main(args):
     # load dataset

 def normalize_text(text: str) -> str:
     """DO ADAPT FOR YOUR USE CASE. this function normalizes the target text."""
     # In addition, we can normalize the target text, e.g. removing new lines characters etc...
     # note that order is important here!
     token_sequences_to_ignore = ["\n\n", "\n", "   ", "  "]
     for t in token_sequences_to_ignore:
         text = " ".join(text.split(t))
+    chars_to_ignore_regex = '[^a-zàâäçéèêëîïôöùûüÿ\'’ ]'  # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
+    text = re.sub(chars_to_ignore_regex, "", text.lower()).replace('’', "'")
+    return text
 def main(args):
     # load dataset

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a7ac9a4075231a9b1f2ef054fe1161fdf7235b6c7bd018f7505d44da3332960
+size 3850548401