versae commited on
Commit
3519ed9
1 Parent(s): 23fd420

Adding new scores

Browse files
NbAiLab_NPSC_16K_mp3_test_eval_results.txt CHANGED
@@ -1,2 +1,2 @@
1
- WER: 0.11892896728078826
2
- CER: 0.03728901318225468
 
1
+ WER: 0.09829976810645934
2
+ CER: 0.031738041959668346
eval.py CHANGED
@@ -65,6 +65,7 @@ def normalize_text(text: str) -> str:
65
  text = re.sub('<ee>', 'eee', text)
66
  text = re.sub('<qq>', 'qqq', text)
67
  text = re.sub('<mm>', 'mmm', text)
 
68
  text = re.sub('[<>]', '', text)
69
 
70
  # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
@@ -80,6 +81,7 @@ def normalize_text(text: str) -> str:
80
  def main(args):
81
  # load dataset
82
  dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
 
83
 
84
  # for testing: only process the first two examples as a test
85
  # dataset = dataset.select(range(10))
 
65
  text = re.sub('<ee>', 'eee', text)
66
  text = re.sub('<qq>', 'qqq', text)
67
  text = re.sub('<mm>', 'mmm', text)
68
+ text = re.sub('<inaudible>', 'xxx', text)
69
  text = re.sub('[<>]', '', text)
70
 
71
  # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
 
81
  def main(args):
82
  # load dataset
83
  dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True)
84
+ #dataset = load_dataset(args.dataset, args.config, split=args.split, use_auth_token=True).filter(lambda entry: re.search("nb-nn", entry["sentence_language_code"], flags=re.IGNORECASE))
85
 
86
  # for testing: only process the first two examples as a test
87
  # dataset = dataset.select(range(10))
log_NbAiLab_NPSC_16K_mp3_test_predictions.txt CHANGED
The diff for this file is too large to render. See raw diff
 
log_NbAiLab_NPSC_16K_mp3_test_targets.txt CHANGED
The diff for this file is too large to render. See raw diff