marinone94 commited on
Commit
41abd72
1 Parent(s): ef3cdad

add eval scripts

Browse files
Files changed (3) hide show
  1. eval.py +12 -1
  2. run_eval_cv.sh +7 -0
  3. run_eval_real_world.sh +7 -0
eval.py CHANGED
@@ -51,7 +51,18 @@ def normalize_text(text: str) -> str:
51
 
52
  chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
53
 
54
- text = re.sub(chars_to_ignore_regex, "", text.lower())
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # In addition, we can normalize the target text, e.g. removing new lines characters etc...
57
  # note that order is important here!
 
51
 
52
  chars_to_ignore_regex = '[,?.!\-\;\:"“%‘”�—’…–]' # noqa: W605 IMPORTANT: this should correspond to the chars that were ignored during training
53
 
54
+ text = re.sub(chars_to_ignore_regex, "", text.lower()) \
55
+ .replace("\\\\punkt", "") \
56
+ .replace("\\\\komma", "") \
57
+ .replace("è", "e") \
58
+ .replace("é", "e") \
59
+ .replace("î", "i") \
60
+ .replace("ü", "u") \
61
+ .replace("ÿ", "y") \
62
+ .replace("ô", "o") \
63
+ .replace("\\", "") \
64
+ .replace("/", "") \
65
+ .replace("|", "")
66
 
67
  # In addition, we can normalize the target text, e.g. removing new lines characters etc...
68
  # note that order is important here!
run_eval_cv.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ python eval.py \
2
+ --model_id marinone94/xls-r-300m-sv-robust \
3
+ --dataset mozilla-foundation/common_voice_8_0 \
4
+ --config sv-SE \
5
+ --split test \
6
+ --chunk_length_s 10 \
7
+ --stride_length_s 1
run_eval_real_world.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ python eval.py \
2
+ --model_id marinone94/xls-r-300m-sv-robust \
3
+ --dataset speech-recognition-community-v2/dev_data \
4
+ --config sv \
5
+ --split validation \
6
+ --chunk_length_s 10 \
7
+ --stride_length_s 1